From 840292841f4495a79a037f81a26d6b3f51e7cb8c Mon Sep 17 00:00:00 2001 From: Adam Coldrick Date: Thu, 4 Jun 2015 15:25:33 +0000 Subject: Move xfer-hole and recv-hole into extensions/ Change-Id: I46ed5f3ec85f9662bebac592eff7a6eb6d628f28 --- extensions/recv-hole | 158 +++++++++++++++++++++++++++++++++++++++++++++++++++ extensions/xfer-hole | 137 ++++++++++++++++++++++++++++++++++++++++++++ recv-hole | 158 --------------------------------------------------- xfer-hole | 137 -------------------------------------------- 4 files changed, 295 insertions(+), 295 deletions(-) create mode 100755 extensions/recv-hole create mode 100755 extensions/xfer-hole delete mode 100755 recv-hole delete mode 100755 xfer-hole diff --git a/extensions/recv-hole b/extensions/recv-hole new file mode 100755 index 00000000..fe69f304 --- /dev/null +++ b/extensions/recv-hole @@ -0,0 +1,158 @@ +#!/bin/sh +# +# Copyright (C) 2014-2015 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# =*= License: GPL-2 =*= + + +# Receive a data stream describing a sparse file, and reproduce it, +# either to a named file or stdout. +# +# The data stream is simple: it's a sequence of DATA or HOLE records: +# +# DATA +# 123 +# <123 bytes of binary data, NOT including newline at the end> +# +# HOLE +# 123 +# +# This shell script can be executed over ssh (given to ssh as an arguemnt, +# with suitable escaping) on a different computer. This allows a large +# sparse file (e.g., disk image) be transferred quickly. +# +# This script should be called in one of the following ways: +# +# recv-hole file FILENAME +# recv-hole vbox FILENAME DISKSIZE +# +# In both cases, FILENAME is the pathname of the disk image on the +# receiving end. DISKSIZE is the size of the disk image in bytes. The +# first form is used when transferring a disk image to become an +# identical file on the receiving end. +# +# The second form is used when the disk image should be converted for +# use by VirtualBox. In this case, we want to avoid writing a +# temporary file on disk, and then calling the VirtualBox VBoxManage +# tool to do the conversion, since that would involve large amounts of +# unnecessary I/O and disk usage. Instead we pipe the file directly to +# VBoxManage, avoiding those issues. The piping is done here in this +# script, instead of in the caller, to make it easier to run things +# over ssh. +# +# However, since it's not possible seek in a Unix pipe, we have to +# explicitly write the zeroes into the pipe. This is not +# super-efficient, but the way to avoid that would be to avoid sending +# a sparse file, and do the conversion to a VDI on the sending end. +# That is out of scope for xfer-hole and recv-hole. + + +set -eu + + +die() +{ + echo "$@" 1>&2 + exit 1 +} + + +recv_hole_to_file() +{ + local n + + read n + truncate --size "+$n" "$1" +} + + +recv_data_to_file() +{ + local n + read n + + local blocksize=1048576 + local blocks=$(($n / $blocksize)) + local extra=$(($n % $blocksize)) + + xfer_data_to_stdout "$blocksize" "$blocks" >> "$1" + xfer_data_to_stdout 1 "$extra" >> "$1" +} + + +recv_hole_to_stdout() +{ + local n + read n + (echo "$n"; cat /dev/zero) | recv_data_to_stdout +} + + +recv_data_to_stdout() +{ + local n + read n + + local blocksize=1048576 + local blocks=$(($n / $blocksize)) + local extra=$(($n % $blocksize)) + + xfer_data_to_stdout "$blocksize" "$blocks" + xfer_data_to_stdout 1 "$extra" +} + + +xfer_data_to_stdout() +{ + local log="$(mktemp)" + if ! dd "bs=$1" count="$2" iflag=fullblock status=noxfer 2> "$log" + then + cat "$log" 1>&2 + rm -f "$log" + exit 1 + else + rm -f "$log" + fi +} + + +type="$1" +case "$type" in + file) + output="$2" + truncate --size=0 "$output" + while read what + do + case "$what" in + DATA) recv_data_to_file "$output" ;; + HOLE) recv_hole_to_file "$output" ;; + *) die "Unknown instruction: $what" ;; + esac + done + ;; + vbox) + output="$2" + disk_size="$3" + while read what + do + case "$what" in + DATA) recv_data_to_stdout ;; + HOLE) recv_hole_to_stdout ;; + *) die "Unknown instruction: $what" ;; + esac + done | + VBoxManage convertfromraw stdin "$output" "$disk_size" + ;; +esac diff --git a/extensions/xfer-hole b/extensions/xfer-hole new file mode 100755 index 00000000..91f1be01 --- /dev/null +++ b/extensions/xfer-hole @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# +# Send a sparse file more space-efficiently. +# See recv-hole for a description of the protocol. +# +# Note that xfer-hole requires a version of Linux with support for +# SEEK_DATA and SEEK_HOLE. +# +# +# Copyright (C) 2014-2015 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# =*= License: GPL-2 =*= + + + +import errno +import os +import sys + + +SEEK_DATA = 3 +SEEK_HOLE = 4 + + +filename = sys.argv[1] +fd = os.open(filename, os.O_RDONLY) +pos = 0 + + +DATA = 'data' +HOLE = 'hole' +EOF = 'eof' + + +def safe_lseek(fd, pos, whence): + try: + return os.lseek(fd, pos, whence) + except OSError as e: + if e.errno == errno.ENXIO: + return -1 + raise + + +def current_data_or_pos(fd, pos): + length = safe_lseek(fd, 0, os.SEEK_END) + next_data = safe_lseek(fd, pos, SEEK_DATA) + next_hole = safe_lseek(fd, pos, SEEK_HOLE) + + if pos == length: + return EOF, pos + elif pos == next_data: + return DATA, pos + elif pos == next_hole: + return HOLE, pos + else: + assert False, \ + ("Do not understand: pos=%d next_data=%d next_hole=%d" % + (pos, next_data, next_hole)) + + +def next_data_or_hole(fd, pos): + length = safe_lseek(fd, 0, os.SEEK_END) + next_data = safe_lseek(fd, pos, SEEK_DATA) + next_hole = safe_lseek(fd, pos, SEEK_HOLE) + + if pos == length: + return EOF, pos + elif pos == next_data: + # We are at data. + if next_hole == -1 or next_hole == length: + return EOF, length + else: + return HOLE, next_hole + elif pos == next_hole: + # We are at a hole. + if next_data == -1 or next_data == length: + return EOF, length + else: + return DATA, next_data + else: + assert False, \ + ("Do not understand: pos=%d next_data=%d next_hole=%d" % + (pos, next_data, next_hole)) + + +def find_data_and_holes(fd): + pos = safe_lseek(fd, 0, os.SEEK_CUR) + + kind, pos = current_data_or_pos(fd, pos) + while kind != EOF: + yield kind, pos + kind, pos = next_data_or_hole(fd, pos) + yield kind, pos + + +def make_xfer_instructions(fd): + prev_kind = None + prev_pos = None + for kind, pos in find_data_and_holes(fd): + if prev_kind == DATA: + yield (DATA, prev_pos, pos) + elif prev_kind == HOLE: + yield (HOLE, prev_pos, pos) + prev_kind = kind + prev_pos = pos + + +def copy_slice_from_file(to, fd, start, end): + safe_lseek(fd, start, os.SEEK_SET) + nbytes = end - start + max_at_a_time = 1024**2 + while nbytes > 0: + data = os.read(fd, min(nbytes, max_at_a_time)) + if not data: + break + to.write(data) + nbytes -= len(data) + + +for kind, start, end in make_xfer_instructions(fd): + if kind == HOLE: + sys.stdout.write('HOLE\n%d\n' % (end - start)) + elif kind == DATA: + sys.stdout.write('DATA\n%d\n' % (end - start)) + copy_slice_from_file(sys.stdout, fd, start, end) diff --git a/recv-hole b/recv-hole deleted file mode 100755 index fe69f304..00000000 --- a/recv-hole +++ /dev/null @@ -1,158 +0,0 @@ -#!/bin/sh -# -# Copyright (C) 2014-2015 Codethink Limited -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# =*= License: GPL-2 =*= - - -# Receive a data stream describing a sparse file, and reproduce it, -# either to a named file or stdout. -# -# The data stream is simple: it's a sequence of DATA or HOLE records: -# -# DATA -# 123 -# <123 bytes of binary data, NOT including newline at the end> -# -# HOLE -# 123 -# -# This shell script can be executed over ssh (given to ssh as an arguemnt, -# with suitable escaping) on a different computer. This allows a large -# sparse file (e.g., disk image) be transferred quickly. -# -# This script should be called in one of the following ways: -# -# recv-hole file FILENAME -# recv-hole vbox FILENAME DISKSIZE -# -# In both cases, FILENAME is the pathname of the disk image on the -# receiving end. DISKSIZE is the size of the disk image in bytes. The -# first form is used when transferring a disk image to become an -# identical file on the receiving end. -# -# The second form is used when the disk image should be converted for -# use by VirtualBox. In this case, we want to avoid writing a -# temporary file on disk, and then calling the VirtualBox VBoxManage -# tool to do the conversion, since that would involve large amounts of -# unnecessary I/O and disk usage. Instead we pipe the file directly to -# VBoxManage, avoiding those issues. The piping is done here in this -# script, instead of in the caller, to make it easier to run things -# over ssh. -# -# However, since it's not possible seek in a Unix pipe, we have to -# explicitly write the zeroes into the pipe. This is not -# super-efficient, but the way to avoid that would be to avoid sending -# a sparse file, and do the conversion to a VDI on the sending end. -# That is out of scope for xfer-hole and recv-hole. - - -set -eu - - -die() -{ - echo "$@" 1>&2 - exit 1 -} - - -recv_hole_to_file() -{ - local n - - read n - truncate --size "+$n" "$1" -} - - -recv_data_to_file() -{ - local n - read n - - local blocksize=1048576 - local blocks=$(($n / $blocksize)) - local extra=$(($n % $blocksize)) - - xfer_data_to_stdout "$blocksize" "$blocks" >> "$1" - xfer_data_to_stdout 1 "$extra" >> "$1" -} - - -recv_hole_to_stdout() -{ - local n - read n - (echo "$n"; cat /dev/zero) | recv_data_to_stdout -} - - -recv_data_to_stdout() -{ - local n - read n - - local blocksize=1048576 - local blocks=$(($n / $blocksize)) - local extra=$(($n % $blocksize)) - - xfer_data_to_stdout "$blocksize" "$blocks" - xfer_data_to_stdout 1 "$extra" -} - - -xfer_data_to_stdout() -{ - local log="$(mktemp)" - if ! dd "bs=$1" count="$2" iflag=fullblock status=noxfer 2> "$log" - then - cat "$log" 1>&2 - rm -f "$log" - exit 1 - else - rm -f "$log" - fi -} - - -type="$1" -case "$type" in - file) - output="$2" - truncate --size=0 "$output" - while read what - do - case "$what" in - DATA) recv_data_to_file "$output" ;; - HOLE) recv_hole_to_file "$output" ;; - *) die "Unknown instruction: $what" ;; - esac - done - ;; - vbox) - output="$2" - disk_size="$3" - while read what - do - case "$what" in - DATA) recv_data_to_stdout ;; - HOLE) recv_hole_to_stdout ;; - *) die "Unknown instruction: $what" ;; - esac - done | - VBoxManage convertfromraw stdin "$output" "$disk_size" - ;; -esac diff --git a/xfer-hole b/xfer-hole deleted file mode 100755 index 91f1be01..00000000 --- a/xfer-hole +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env python -# -# Send a sparse file more space-efficiently. -# See recv-hole for a description of the protocol. -# -# Note that xfer-hole requires a version of Linux with support for -# SEEK_DATA and SEEK_HOLE. -# -# -# Copyright (C) 2014-2015 Codethink Limited -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program. If not, see . -# -# =*= License: GPL-2 =*= - - - -import errno -import os -import sys - - -SEEK_DATA = 3 -SEEK_HOLE = 4 - - -filename = sys.argv[1] -fd = os.open(filename, os.O_RDONLY) -pos = 0 - - -DATA = 'data' -HOLE = 'hole' -EOF = 'eof' - - -def safe_lseek(fd, pos, whence): - try: - return os.lseek(fd, pos, whence) - except OSError as e: - if e.errno == errno.ENXIO: - return -1 - raise - - -def current_data_or_pos(fd, pos): - length = safe_lseek(fd, 0, os.SEEK_END) - next_data = safe_lseek(fd, pos, SEEK_DATA) - next_hole = safe_lseek(fd, pos, SEEK_HOLE) - - if pos == length: - return EOF, pos - elif pos == next_data: - return DATA, pos - elif pos == next_hole: - return HOLE, pos - else: - assert False, \ - ("Do not understand: pos=%d next_data=%d next_hole=%d" % - (pos, next_data, next_hole)) - - -def next_data_or_hole(fd, pos): - length = safe_lseek(fd, 0, os.SEEK_END) - next_data = safe_lseek(fd, pos, SEEK_DATA) - next_hole = safe_lseek(fd, pos, SEEK_HOLE) - - if pos == length: - return EOF, pos - elif pos == next_data: - # We are at data. - if next_hole == -1 or next_hole == length: - return EOF, length - else: - return HOLE, next_hole - elif pos == next_hole: - # We are at a hole. - if next_data == -1 or next_data == length: - return EOF, length - else: - return DATA, next_data - else: - assert False, \ - ("Do not understand: pos=%d next_data=%d next_hole=%d" % - (pos, next_data, next_hole)) - - -def find_data_and_holes(fd): - pos = safe_lseek(fd, 0, os.SEEK_CUR) - - kind, pos = current_data_or_pos(fd, pos) - while kind != EOF: - yield kind, pos - kind, pos = next_data_or_hole(fd, pos) - yield kind, pos - - -def make_xfer_instructions(fd): - prev_kind = None - prev_pos = None - for kind, pos in find_data_and_holes(fd): - if prev_kind == DATA: - yield (DATA, prev_pos, pos) - elif prev_kind == HOLE: - yield (HOLE, prev_pos, pos) - prev_kind = kind - prev_pos = pos - - -def copy_slice_from_file(to, fd, start, end): - safe_lseek(fd, start, os.SEEK_SET) - nbytes = end - start - max_at_a_time = 1024**2 - while nbytes > 0: - data = os.read(fd, min(nbytes, max_at_a_time)) - if not data: - break - to.write(data) - nbytes -= len(data) - - -for kind, start, end in make_xfer_instructions(fd): - if kind == HOLE: - sys.stdout.write('HOLE\n%d\n' % (end - start)) - elif kind == DATA: - sys.stdout.write('DATA\n%d\n' % (end - start)) - copy_slice_from_file(sys.stdout, fd, start, end) -- cgit v1.2.1