diff options
Diffstat (limited to 'boto/s3/key.py')
-rw-r--r-- | boto/s3/key.py | 224 |
1 files changed, 143 insertions, 81 deletions
diff --git a/boto/s3/key.py b/boto/s3/key.py index 71013a54..fa9bc61f 100644 --- a/boto/s3/key.py +++ b/boto/s3/key.py @@ -21,6 +21,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. +import errno import mimetypes import os import re @@ -32,6 +33,7 @@ import math import urllib import boto.utils from boto.exception import BotoClientError +from boto.exception import StorageDataError from boto.provider import Provider from boto.s3.keyfile import KeyFile from boto.s3.user import User @@ -109,8 +111,6 @@ class Key(object): self.last_modified = None self.owner = None self.storage_class = 'STANDARD' - self.md5 = None - self.base64md5 = None self.path = None self.resp = None self.mode = None @@ -126,6 +126,7 @@ class Key(object): # restored object. self.ongoing_restore = None self.expiry_date = None + self.local_hashes = {} def __repr__(self): if self.bucket: @@ -133,18 +134,6 @@ class Key(object): else: return '<Key: None,%s>' % self.name - def __getattr__(self, name): - if name == 'key': - return self.name - else: - raise AttributeError - - def __setattr__(self, name, value): - if name == 'key': - self.__dict__['name'] = value - else: - self.__dict__[name] = value - def __iter__(self): return self @@ -155,6 +144,38 @@ class Key(object): provider = self.bucket.connection.provider return provider + @property + def key(self): + return self.name + + @key.setter + def key(self, value): + self.name = value + + @property + def md5(self): + if 'md5' in self.local_hashes and self.local_hashes['md5']: + return binascii.b2a_hex(self.local_hashes['md5']) + + @md5.setter + def md5(self, value): + if value: + self.local_hashes['md5'] = binascii.a2b_hex(value) + elif 'md5' in self.local_hashes: + self.local_hashes.pop('md5', None) + + @property + def base64md5(self): + if 'md5' in self.local_hashes and self.local_hashes['md5']: + return binascii.b2a_base64(self.local_hashes['md5']).rstrip('\n') + + @base64md5.setter + def base64md5(self, value): + if value: + self.local_hashes['md5'] = binascii.a2b_base64(value) + elif 'md5' in self.local_hashes: + del self.local_hashes['md5'] + def get_md5_from_hexdigest(self, md5_hexdigest): """ A utility function to create the 2-tuple (md5hexdigest, base64md5) @@ -169,7 +190,8 @@ class Key(object): def handle_encryption_headers(self, resp): provider = self.bucket.connection.provider if provider.server_side_encryption_header: - self.encrypted = resp.getheader(provider.server_side_encryption_header, None) + self.encrypted = resp.getheader( + provider.server_side_encryption_header, None) else: self.encrypted = None @@ -202,6 +224,13 @@ class Key(object): elif key == 'expiry-date': self.expiry_date = val + def handle_addl_headers(self, headers): + """ + Used by Key subclasses to do additional, provider-specific + processing of response headers. No-op for this base class. + """ + pass + def open_read(self, headers=None, query_args='', override_num_retries=None, response_headers=None): """ @@ -265,6 +294,7 @@ class Key(object): self.content_disposition = value self.handle_version_headers(self.resp) self.handle_encryption_headers(self.resp) + self.handle_addl_headers(self.resp.getheaders()) def open_write(self, headers=None, override_num_retries=None): """ @@ -295,8 +325,23 @@ class Key(object): closed = False - def close(self): - if self.resp: + def close(self, fast=False): + """ + Close this key. + + :type fast: bool + :param fast: True if you want the connection to be closed without first + reading the content. This should only be used in cases where subsequent + calls don't need to return the content from the open HTTP connection. + Note: As explained at + http://docs.python.org/2/library/httplib.html#httplib.HTTPConnection.getresponse, + callers must read the whole response before sending a new request to the + server. Calling Key.close(fast=True) and making a subsequent request to + the server will work because boto will get an httplib exception and + close/reopen the connection. + + """ + if self.resp and not fast: self.resp.read() self.resp = None self.mode = None @@ -513,7 +558,7 @@ class Key(object): raise self.provider.storage_response_error( response.status, response.reason, response.read()) - def set_redirect(self, redirect_location): + def set_redirect(self, redirect_location, headers=None): """Configure this key to redirect to another location. When the bucket associated with this key is accessed from the website @@ -524,7 +569,12 @@ class Key(object): :param redirect_location: The location to redirect. """ - headers = {'x-amz-website-redirect-location': redirect_location} + if headers is None: + headers = {} + else: + headers = headers.copy() + + headers['x-amz-website-redirect-location'] = redirect_location response = self.bucket.connection.make_request('PUT', self.bucket.name, self.name, headers) if response.status == 200: @@ -626,20 +676,12 @@ class Key(object): point point at the offset from which you wish to upload. ie. if uploading the full file, it should point at the start of the file. Normally when a file is opened for - reading, the fp will point at the first byte. See the + reading, the fp will point at the first byte. See the bytes parameter below for more info. :type headers: dict :param headers: The headers to pass along with the PUT request - :type cb: function - :param cb: a callback function that will be called to report - progress on the upload. The callback should accept two - integer parameters, the first representing the number of - bytes that have been successfully transmitted to S3 and - the second representing the size of the to be transmitted - object. - :type num_cb: int :param num_cb: (optional) If a callback is specified with the cb parameter this parameter determines the granularity of @@ -648,6 +690,13 @@ class Key(object): transfer. Providing a negative integer will cause your callback to be called with each buffer read. + :type query_args: string + :param query_args: (optional) Arguments to pass in the query string. + + :type chunked_transfer: boolean + :param chunked_transfer: (optional) If true, we use chunked + Transfer-Encoding. + :type size: int :param size: (optional) The Maximum number of bytes to read from the file pointer (fp). This is useful when uploading @@ -656,6 +705,13 @@ class Key(object): the default behaviour is to read all bytes from the file pointer. Less bytes may be available. """ + self._send_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb, + query_args=query_args, + chunked_transfer=chunked_transfer, size=size) + + def _send_file_internal(self, fp, headers=None, cb=None, num_cb=10, + query_args=None, chunked_transfer=False, size=None, + hash_algs=None): provider = self.bucket.connection.provider try: spos = fp.tell() @@ -663,6 +719,12 @@ class Key(object): spos = None self.read_from_stream = False + # If hash_algs is unset and the MD5 hasn't already been computed, + # default to an MD5 hash_alg to hash the data on-the-fly. + if hash_algs is None and not self.md5: + hash_algs = {'md5': md5} + digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {}) + def sender(http_conn, method, path, data, headers): # This function is called repeatedly for temporary retries # so we must be sure the file pointer is pointing at the @@ -681,19 +743,13 @@ class Key(object): http_conn.putheader(key, headers[key]) http_conn.endheaders() - # Calculate all MD5 checksums on the fly, if not already computed - if not self.base64md5: - m = md5() - else: - m = None - save_debug = self.bucket.connection.debug self.bucket.connection.debug = 0 - # If the debuglevel < 3 we don't want to show connection + # If the debuglevel < 4 we don't want to show connection # payload, so turn off HTTP connection-level debug output (to # be restored below). # Use the getattr approach to allow this to work in AppEngine. - if getattr(http_conn, 'debuglevel', 0) < 3: + if getattr(http_conn, 'debuglevel', 0) < 4: http_conn.set_debuglevel(0) data_len = 0 @@ -709,7 +765,8 @@ class Key(object): # of data transferred, except when we know size. cb_count = (1024 * 1024) / self.BufferSize elif num_cb > 1: - cb_count = int(math.ceil(cb_size / self.BufferSize / (num_cb - 1.0))) + cb_count = int( + math.ceil(cb_size / self.BufferSize / (num_cb - 1.0))) elif num_cb < 0: cb_count = -1 else: @@ -734,8 +791,8 @@ class Key(object): http_conn.send('\r\n') else: http_conn.send(chunk) - if m: - m.update(chunk) + for alg in digesters: + digesters[alg].update(chunk) if bytes_togo: bytes_togo -= chunk_len if bytes_togo <= 0: @@ -752,10 +809,8 @@ class Key(object): self.size = data_len - if m: - # Use the chunked trailer for the digest - hd = m.hexdigest() - self.md5, self.base64md5 = self.get_md5_from_hexdigest(hd) + for alg in digesters: + self.local_hashes[alg] = digesters[alg].digest() if chunked_transfer: http_conn.send('0\r\n') @@ -765,10 +820,10 @@ class Key(object): if cb and (cb_count <= 1 or i > 0) and data_len > 0: cb(data_len, cb_size) - response = http_conn.getresponse() - body = response.read() http_conn.set_debuglevel(save_debug) self.bucket.connection.debug = save_debug + response = http_conn.getresponse() + body = response.read() if ((response.status == 500 or response.status == 503 or response.getheader('location')) and not chunked_transfer): # we'll try again. @@ -826,6 +881,7 @@ class Key(object): sender=sender, query_args=query_args) self.handle_version_headers(resp, force=True) + self.handle_addl_headers(resp.getheaders()) def compute_md5(self, fp, size=None): """ @@ -838,14 +894,9 @@ class Key(object): :param size: (optional) The Maximum number of bytes to read from the file pointer (fp). This is useful when uploading a file in multiple parts where the file is being split - inplace into different parts. Less bytes may be available. - - :rtype: tuple - :return: A tuple containing the hex digest version of the MD5 - hash as the first element and the base64 encoded version - of the plain digest as the second element. + in place into different parts. Less bytes may be available. """ - tup = compute_md5(fp, size=size) + hex_digest, b64_digest, data_size = compute_md5(fp, size=size) # Returned values are MD5 hash, base64 encoded MD5 hash, and data size. # The internal implementation of compute_md5() needs to return the # data size but we don't want to return that value to the external @@ -853,8 +904,8 @@ class Key(object): # break some code) so we consume the third tuple value here and # return the remainder of the tuple to the caller, thereby preserving # the existing interface. - self.size = tup[2] - return tup[0:2] + self.size = data_size + return (hex_digest, b64_digest) def set_contents_from_stream(self, fp, headers=None, replace=True, cb=None, num_cb=10, policy=None, @@ -1179,14 +1230,15 @@ class Key(object): :param encrypt_key: If True, the new copy of the object will be encrypted on the server-side by S3 and will be stored in an encrypted form while at rest in S3. + + :rtype: int + :return: The number of bytes written to the key. """ - fp = open(filename, 'rb') - try: - self.set_contents_from_file(fp, headers, replace, cb, num_cb, - policy, md5, reduced_redundancy, - encrypt_key=encrypt_key) - finally: - fp.close() + with open(filename, 'rb') as fp: + return self.set_contents_from_file(fp, headers, replace, cb, + num_cb, policy, md5, + reduced_redundancy, + encrypt_key=encrypt_key) def set_contents_from_string(self, s, headers=None, replace=True, cb=None, num_cb=10, policy=None, md5=None, @@ -1297,11 +1349,12 @@ class Key(object): torrent=torrent, version_id=version_id, override_num_retries=override_num_retries, response_headers=response_headers, + hash_algs=None, query_args=None) def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10, torrent=False, version_id=None, override_num_retries=None, - response_headers=None, query_args=None): + response_headers=None, hash_algs=None, query_args=None): if headers is None: headers = {} save_debug = self.bucket.connection.debug @@ -1311,9 +1364,11 @@ class Key(object): query_args = query_args or [] if torrent: query_args.append('torrent') - m = None - else: - m = md5() + + if hash_algs is None and not torrent: + hash_algs = {'md5': md5} + digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {}) + # If a version_id is passed in, use that. If not, check to see # if the Key object has an explicit version_id and, if so, use that. # Otherwise, don't pass a version_id query param. @@ -1323,7 +1378,8 @@ class Key(object): query_args.append('versionId=%s' % version_id) if response_headers: for key in response_headers: - query_args.append('%s=%s' % (key, urllib.quote(response_headers[key]))) + query_args.append('%s=%s' % ( + key, urllib.quote(response_headers[key]))) query_args = '&'.join(query_args) self.open('r', headers, query_args=query_args, override_num_retries=override_num_retries) @@ -1346,22 +1402,27 @@ class Key(object): cb_count = 0 i = 0 cb(data_len, cb_size) - for bytes in self: - fp.write(bytes) - data_len += len(bytes) - if m: - m.update(bytes) - if cb: - if cb_size > 0 and data_len >= cb_size: - break - i += 1 - if i == cb_count or cb_count == -1: - cb(data_len, cb_size) - i = 0 + try: + for bytes in self: + fp.write(bytes) + data_len += len(bytes) + for alg in digesters: + digesters[alg].update(bytes) + if cb: + if cb_size > 0 and data_len >= cb_size: + break + i += 1 + if i == cb_count or cb_count == -1: + cb(data_len, cb_size) + i = 0 + except IOError as e: + if e.errno == errno.ENOSPC: + raise StorageDataError('Out of space for destination file ' + '%s' % fp.name) if cb and (cb_count <= 1 or i > 0) and data_len > 0: cb(data_len, cb_size) - if m: - self.md5 = m.hexdigest() + for alg in digesters: + self.local_hashes[alg] = digesters[alg].digest() if self.size is None and not torrent and "Range" not in headers: self.size = data_len self.close() @@ -1670,7 +1731,8 @@ class Key(object): rewritten_metadata[rewritten_h] = metadata[h] metadata = rewritten_metadata src_bucket.copy_key(self.name, self.bucket.name, self.name, - metadata=metadata, preserve_acl=preserve_acl) + metadata=metadata, preserve_acl=preserve_acl, + headers=headers) def restore(self, days, headers=None): """Restore an object from an archive. |