diff options
author | Robie Basak <robie@justgohome.co.uk> | 2012-10-11 20:31:06 +0100 |
---|---|---|
committer | James Saryerwinnie <js@jamesls.com> | 2012-11-08 11:28:52 -0800 |
commit | 87685d9623080a727f3fac36df1aa3dd1a2a979d (patch) | |
tree | 1007cf4e26ee2448720b712312dd92d6ebc21ed5 /boto/glacier | |
parent | 9ecaf83bf9b6982d1ea9c9b135f658fe9a99503e (diff) | |
download | boto-87685d9623080a727f3fac36df1aa3dd1a2a979d.tar.gz |
Add glacier resume_file_upload
Diffstat (limited to 'boto/glacier')
-rw-r--r-- | boto/glacier/writer.py | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py index 73b4725e..8563c654 100644 --- a/boto/glacier/writer.py +++ b/boto/glacier/writer.py @@ -190,6 +190,23 @@ class _Uploader(object): response.read() self._uploaded_size += len(part_data) + def skip_part(self, part_index, part_tree_hash, part_length): + """Skip uploading of a part. + + The final close call needs to calculate the tree hash and total size + of all uploaded data, so this is the mechanism for resume + functionality to provide it without actually uploading the data again. + + :param part_index: part number where 0 is the first part + :param part_tree_hash: binary tree_hash of part being skipped + :param part_length: length of part being skipped + + """ + if self.closed: + raise ValueError("I/O operation on closed file") + self._insert_tree_hash(part_index, part_tree_hash) + self._uploaded_size += part_length + def close(self): if self.closed: return @@ -205,6 +222,47 @@ class _Uploader(object): self.closed = True +def generate_parts_from_fobj(fobj, part_size): + data = fobj.read(part_size) + while data: + yield data + data = fobj.read(part_size) + + +def resume_file_upload(vault, upload_id, part_size, fobj, part_hash_map, + chunk_size=_ONE_MEGABYTE): + """Resume upload of a file already part-uploaded to Glacier. + + The resumption of an upload where the part-uploaded section is empty is a + valid degenerate case that this function can handle. In this case, + part_hash_map should be an empty dict. + + :param vault: boto.glacier.vault.Vault object. + :param upload_id: existing Glacier upload id of upload being resumed. + :param part_size: part size of existing upload. + :param fobj: file object containing local data to resume. This must read + from the start of the entire upload, not just from the point being + resumed. Use fobj.seek(0) to achieve this if necessary. + :param part_hash_map: {part_index: part_tree_hash, ...} of data already + uploaded. Each supplied part_tree_hash will be verified and the part + re-uploaded if there is a mismatch. + :param chunk_size: chunk size of tree hash calculation. This must be + 1 MiB for Amazon. + + """ + uploader = _Uploader(vault, upload_id, part_size, chunk_size) + for part_index, part_data in enumerate( + generate_parts_from_fobj(fobj, part_size)): + part_tree_hash = tree_hash(chunk_hashes(part_data, chunk_size)) + if (part_index not in part_hash_map or + part_hash_map[part_index] != part_tree_hash): + uploader.upload_part(part_index, part_data) + else: + uploader.skip_part(part_index, part_tree_hash, len(part_data)) + uploader.close() + return uploader.archive_id + + class Writer(object): """ Presents a file-like object for writing to a Amazon Glacier |