Add glacier resume_file_upload

author: Robie Basak <robie@justgohome.co.uk> 2012-10-11 20:31:06 +0100
committer: James Saryerwinnie <js@jamesls.com> 2012-11-08 11:28:52 -0800
commit: 87685d9623080a727f3fac36df1aa3dd1a2a979d (patch)
tree: 1007cf4e26ee2448720b712312dd92d6ebc21ed5 /boto/glacier
parent: 9ecaf83bf9b6982d1ea9c9b135f658fe9a99503e (diff)
download: boto-87685d9623080a727f3fac36df1aa3dd1a2a979d.tar.gz
1 files changed, 58 insertions, 0 deletions
diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py
index 73b4725e..8563c654 100644
--- a/boto/glacier/writer.py
+++ b/boto/glacier/writer.py
@@ -190,6 +190,23 @@ class _Uploader(object):
         response.read()
         self._uploaded_size += len(part_data)
 
+    def skip_part(self, part_index, part_tree_hash, part_length):
+        """Skip uploading of a part.
+
+        The final close call needs to calculate the tree hash and total size
+        of all uploaded data, so this is the mechanism for resume
+        functionality to provide it without actually uploading the data again.
+
+        :param part_index: part number where 0 is the first part
+        :param part_tree_hash: binary tree_hash of part being skipped
+        :param part_length: length of part being skipped
+
+        """
+        if self.closed:
+            raise ValueError("I/O operation on closed file")
+        self._insert_tree_hash(part_index, part_tree_hash)
+        self._uploaded_size += part_length
+
     def close(self):
         if self.closed:
             return
@@ -205,6 +222,47 @@ class _Uploader(object):
         self.closed = True
 
 
+def generate_parts_from_fobj(fobj, part_size):
+    data = fobj.read(part_size)
+    while data:
+        yield data
+        data = fobj.read(part_size)
+
+
+def resume_file_upload(vault, upload_id, part_size, fobj, part_hash_map,
+                       chunk_size=_ONE_MEGABYTE):
+    """Resume upload of a file already part-uploaded to Glacier.
+
+    The resumption of an upload where the part-uploaded section is empty is a
+    valid degenerate case that this function can handle. In this case,
+    part_hash_map should be an empty dict.
+
+    :param vault: boto.glacier.vault.Vault object.
+    :param upload_id: existing Glacier upload id of upload being resumed.
+    :param part_size: part size of existing upload.
+    :param fobj: file object containing local data to resume. This must read
+        from the start of the entire upload, not just from the point being
+        resumed. Use fobj.seek(0) to achieve this if necessary.
+    :param part_hash_map: {part_index: part_tree_hash, ...} of data already
+        uploaded. Each supplied part_tree_hash will be verified and the part
+        re-uploaded if there is a mismatch.
+    :param chunk_size: chunk size of tree hash calculation. This must be
+        1 MiB for Amazon.
+
+    """
+    uploader = _Uploader(vault, upload_id, part_size, chunk_size)
+    for part_index, part_data in enumerate(
+            generate_parts_from_fobj(fobj, part_size)):
+        part_tree_hash = tree_hash(chunk_hashes(part_data, chunk_size))
+        if (part_index not in part_hash_map or
+                part_hash_map[part_index] != part_tree_hash):
+            uploader.upload_part(part_index, part_data)
+        else:
+            uploader.skip_part(part_index, part_tree_hash, len(part_data))
+    uploader.close()
+    return uploader.archive_id
+
+
 class Writer(object):
     """
     Presents a file-like object for writing to a Amazon Glacier
author	Robie Basak <robie@justgohome.co.uk>	2012-10-11 20:31:06 +0100
committer	James Saryerwinnie <js@jamesls.com>	2012-11-08 11:28:52 -0800
commit	87685d9623080a727f3fac36df1aa3dd1a2a979d (patch)
tree	1007cf4e26ee2448720b712312dd92d6ebc21ed5 /boto/glacier
parent	9ecaf83bf9b6982d1ea9c9b135f658fe9a99503e (diff)
download	boto-87685d9623080a727f3fac36df1aa3dd1a2a979d.tar.gz