summaryrefslogtreecommitdiff
path: root/boto/glacier
diff options
context:
space:
mode:
authorRobie Basak <robie@justgohome.co.uk>2012-10-11 20:31:06 +0100
committerJames Saryerwinnie <js@jamesls.com>2012-11-08 11:28:52 -0800
commit87685d9623080a727f3fac36df1aa3dd1a2a979d (patch)
tree1007cf4e26ee2448720b712312dd92d6ebc21ed5 /boto/glacier
parent9ecaf83bf9b6982d1ea9c9b135f658fe9a99503e (diff)
downloadboto-87685d9623080a727f3fac36df1aa3dd1a2a979d.tar.gz
Add glacier resume_file_upload
Diffstat (limited to 'boto/glacier')
-rw-r--r--boto/glacier/writer.py58
1 files changed, 58 insertions, 0 deletions
diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py
index 73b4725e..8563c654 100644
--- a/boto/glacier/writer.py
+++ b/boto/glacier/writer.py
@@ -190,6 +190,23 @@ class _Uploader(object):
response.read()
self._uploaded_size += len(part_data)
+ def skip_part(self, part_index, part_tree_hash, part_length):
+ """Skip uploading of a part.
+
+ The final close call needs to calculate the tree hash and total size
+ of all uploaded data, so this is the mechanism for resume
+ functionality to provide it without actually uploading the data again.
+
+ :param part_index: part number where 0 is the first part
+ :param part_tree_hash: binary tree_hash of part being skipped
+ :param part_length: length of part being skipped
+
+ """
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+ self._insert_tree_hash(part_index, part_tree_hash)
+ self._uploaded_size += part_length
+
def close(self):
if self.closed:
return
@@ -205,6 +222,47 @@ class _Uploader(object):
self.closed = True
+def generate_parts_from_fobj(fobj, part_size):
+ data = fobj.read(part_size)
+ while data:
+ yield data
+ data = fobj.read(part_size)
+
+
+def resume_file_upload(vault, upload_id, part_size, fobj, part_hash_map,
+ chunk_size=_ONE_MEGABYTE):
+ """Resume upload of a file already part-uploaded to Glacier.
+
+ The resumption of an upload where the part-uploaded section is empty is a
+ valid degenerate case that this function can handle. In this case,
+ part_hash_map should be an empty dict.
+
+ :param vault: boto.glacier.vault.Vault object.
+ :param upload_id: existing Glacier upload id of upload being resumed.
+ :param part_size: part size of existing upload.
+ :param fobj: file object containing local data to resume. This must read
+ from the start of the entire upload, not just from the point being
+ resumed. Use fobj.seek(0) to achieve this if necessary.
+ :param part_hash_map: {part_index: part_tree_hash, ...} of data already
+ uploaded. Each supplied part_tree_hash will be verified and the part
+ re-uploaded if there is a mismatch.
+ :param chunk_size: chunk size of tree hash calculation. This must be
+ 1 MiB for Amazon.
+
+ """
+ uploader = _Uploader(vault, upload_id, part_size, chunk_size)
+ for part_index, part_data in enumerate(
+ generate_parts_from_fobj(fobj, part_size)):
+ part_tree_hash = tree_hash(chunk_hashes(part_data, chunk_size))
+ if (part_index not in part_hash_map or
+ part_hash_map[part_index] != part_tree_hash):
+ uploader.upload_part(part_index, part_data)
+ else:
+ uploader.skip_part(part_index, part_tree_hash, len(part_data))
+ uploader.close()
+ return uploader.archive_id
+
+
class Writer(object):
"""
Presents a file-like object for writing to a Amazon Glacier