diff options
author | Mitch Garnaat <mitch@garnaat.com> | 2012-09-19 21:22:22 -0700 |
---|---|---|
committer | Mitch Garnaat <mitch@garnaat.com> | 2012-09-19 21:22:22 -0700 |
commit | 7073e7a4cc63501d7a3aa7cf84b17a4621c1417a (patch) | |
tree | d0bb1911a2e33b6419d8f2d7a0a84acfa4abffdb | |
parent | 6c3cdeca0db5280f4c24560450eb9936d20e6eea (diff) | |
parent | 54201dc35c481207c8dc7042f59057d2a7e28d82 (diff) | |
download | boto-7073e7a4cc63501d7a3aa7cf84b17a4621c1417a.tar.gz |
Merge branch 'develop'2.6.0
-rw-r--r-- | boto/glacier/exceptions.py | 14 | ||||
-rw-r--r-- | boto/glacier/job.py | 63 |
2 files changed, 72 insertions, 5 deletions
diff --git a/boto/glacier/exceptions.py b/boto/glacier/exceptions.py index 3942da68..e5258805 100644 --- a/boto/glacier/exceptions.py +++ b/boto/glacier/exceptions.py @@ -22,6 +22,7 @@ # import json + class UnexpectedHTTPResponseError(Exception): def __init__(self, expected_responses, response): self.status = response.status @@ -31,11 +32,10 @@ class UnexpectedHTTPResponseError(Exception): body = json.loads(self.body) self.code = body["code"] msg = 'Expected %s, got ' % expected_responses - msg += '(%d, code=%s, message=%s)' % (expected_responses, - response.status, + msg += '(%d, code=%s, message=%s)' % (response.status, self.code, body["message"]) - except: + except Exception: msg = 'Expected %s, got (%d, %s)' % (expected_responses, response.status, self.body) @@ -44,3 +44,11 @@ class UnexpectedHTTPResponseError(Exception): class UploadArchiveError(Exception): pass + + +class DownloadArchiveError(Exception): + pass + + +class TreeHashDoesNotMatchError(DownloadArchiveError): + pass diff --git a/boto/glacier/job.py b/boto/glacier/job.py index cdb53bc6..62f0758d 100644 --- a/boto/glacier/job.py +++ b/boto/glacier/job.py @@ -20,12 +20,17 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. # -import urllib -import json +import math +import socket + +from .exceptions import TreeHashDoesNotMatchError, DownloadArchiveError +from .writer import bytes_to_hex, chunk_hashes, tree_hash class Job(object): + DefaultPartSize = 4 * 1024 * 1024 + ResponseDataElements = (('Action', 'action', None), ('ArchiveId', 'archive_id', None), ('ArchiveSizeInBytes', 'archive_size', 0), @@ -75,3 +80,57 @@ class Job(object): return self.vault.layer1.get_job_output(self.vault.name, self.id, byte_range) + + def download_to_file(self, filename, chunk_size=DefaultPartSize, + verify_hashes=True, retry_exceptions=(socket.error,)): + """Download an archive to a file. + + :type filename: str + :param filename: The name of the file where the archive + contents will be saved. + + :type chunk_size: int + :param chunk_size: The chunk size to use when downloading + the archive. + + :type verify_hashes: bool + :param verify_hashes: Indicates whether or not to verify + the tree hashes for each downloaded chunk. + + """ + num_chunks = int(math.ceil(self.archive_size / float(chunk_size))) + with open(filename, 'wb') as output_file: + self._download_to_fileob(output_file, num_chunks, chunk_size, + verify_hashes, retry_exceptions) + + def _download_to_fileob(self, fileobj, num_chunks, chunk_size, verify_hashes, + retry_exceptions): + for i in xrange(num_chunks): + byte_range = ((i * chunk_size), ((i + 1) * chunk_size) - 1) + data, expected_tree_hash = self._download_byte_range( + byte_range, retry_exceptions) + if verify_hashes: + actual_tree_hash = bytes_to_hex(tree_hash(chunk_hashes(data))) + if expected_tree_hash != actual_tree_hash: + raise TreeHashDoesNotMatchError( + "The calculated tree hash %s does not match the " + "expected tree hash %s for the byte range %s" % ( + actual_tree_hash, expected_tree_hash, byte_range)) + fileobj.write(data) + + def _download_byte_range(self, byte_range, retry_exceptions): + # You can occasionally get socket.errors when downloading + # chunks from Glacier, so each chunk can be retried up + # to 5 times. + for _ in xrange(5): + try: + response = self.get_output(byte_range) + data = response.read() + expected_tree_hash = response['TreeHash'] + return data, expected_tree_hash + except retry_exceptions, e: + continue + else: + raise DownloadArchiveError("There was an error downloading" + "byte range %s: %s" % (byte_range, + e)) |