1 files changed, 143 insertions, 81 deletions
diff --git a/boto/s3/key.py b/boto/s3/key.py
index 71013a54..fa9bc61f 100644
--- a/boto/s3/key.py
+++ b/boto/s3/key.py
@@ -21,6 +21,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.
 
+import errno
 import mimetypes
 import os
 import re
@@ -32,6 +33,7 @@ import math
 import urllib
 import boto.utils
 from boto.exception import BotoClientError
+from boto.exception import StorageDataError
 from boto.provider import Provider
 from boto.s3.keyfile import KeyFile
 from boto.s3.user import User
@@ -109,8 +111,6 @@ class Key(object):
         self.last_modified = None
         self.owner = None
         self.storage_class = 'STANDARD'
-        self.md5 = None
-        self.base64md5 = None
         self.path = None
         self.resp = None
         self.mode = None
@@ -126,6 +126,7 @@ class Key(object):
         # restored object.
         self.ongoing_restore = None
         self.expiry_date = None
+        self.local_hashes = {}
 
     def __repr__(self):
         if self.bucket:
@@ -133,18 +134,6 @@ class Key(object):
         else:
             return '<Key: None,%s>' % self.name
 
-    def __getattr__(self, name):
-        if name == 'key':
-            return self.name
-        else:
-            raise AttributeError
-
-    def __setattr__(self, name, value):
-        if name == 'key':
-            self.__dict__['name'] = value
-        else:
-            self.__dict__[name] = value
-
     def __iter__(self):
         return self
 
@@ -155,6 +144,38 @@ class Key(object):
             provider = self.bucket.connection.provider
         return provider
 
+    @property
+    def key(self):
+        return self.name
+
+    @key.setter
+    def key(self, value):
+        self.name = value
+
+    @property
+    def md5(self):
+        if 'md5' in self.local_hashes and self.local_hashes['md5']:
+            return binascii.b2a_hex(self.local_hashes['md5'])
+
+    @md5.setter
+    def md5(self, value):
+        if value:
+            self.local_hashes['md5'] = binascii.a2b_hex(value)
+        elif 'md5' in self.local_hashes:
+            self.local_hashes.pop('md5', None)
+
+    @property
+    def base64md5(self):
+        if 'md5' in self.local_hashes and self.local_hashes['md5']:
+            return binascii.b2a_base64(self.local_hashes['md5']).rstrip('\n')
+
+    @base64md5.setter
+    def base64md5(self, value):
+        if value:
+            self.local_hashes['md5'] = binascii.a2b_base64(value)
+        elif 'md5' in self.local_hashes:
+            del self.local_hashes['md5']
+
     def get_md5_from_hexdigest(self, md5_hexdigest):
         """
         A utility function to create the 2-tuple (md5hexdigest, base64md5)
@@ -169,7 +190,8 @@ class Key(object):
     def handle_encryption_headers(self, resp):
         provider = self.bucket.connection.provider
         if provider.server_side_encryption_header:
-            self.encrypted = resp.getheader(provider.server_side_encryption_header, None)
+            self.encrypted = resp.getheader(
+                provider.server_side_encryption_header, None)
         else:
             self.encrypted = None
 
@@ -202,6 +224,13 @@ class Key(object):
             elif key == 'expiry-date':
                 self.expiry_date = val
 
+    def handle_addl_headers(self, headers):
+        """
+        Used by Key subclasses to do additional, provider-specific
+        processing of response headers. No-op for this base class.
+        """
+        pass
+
     def open_read(self, headers=None, query_args='',
                   override_num_retries=None, response_headers=None):
         """
@@ -265,6 +294,7 @@ class Key(object):
                     self.content_disposition = value
             self.handle_version_headers(self.resp)
             self.handle_encryption_headers(self.resp)
+            self.handle_addl_headers(self.resp.getheaders())
 
     def open_write(self, headers=None, override_num_retries=None):
         """
@@ -295,8 +325,23 @@ class Key(object):
 
     closed = False
 
-    def close(self):
-        if self.resp:
+    def close(self, fast=False):
+        """
+        Close this key.
+
+        :type fast: bool
+        :param fast: True if you want the connection to be closed without first
+        reading the content. This should only be used in cases where subsequent
+        calls don't need to return the content from the open HTTP connection.
+        Note: As explained at
+        http://docs.python.org/2/library/httplib.html#httplib.HTTPConnection.getresponse,
+        callers must read the whole response before sending a new request to the
+        server. Calling Key.close(fast=True) and making a subsequent request to
+        the server will work because boto will get an httplib exception and
+        close/reopen the connection.
+
+        """
+        if self.resp and not fast:
             self.resp.read()
         self.resp = None
         self.mode = None
@@ -513,7 +558,7 @@ class Key(object):
             raise self.provider.storage_response_error(
                 response.status, response.reason, response.read())
 
-    def set_redirect(self, redirect_location):
+    def set_redirect(self, redirect_location, headers=None):
         """Configure this key to redirect to another location.
 
         When the bucket associated with this key is accessed from the website
@@ -524,7 +569,12 @@ class Key(object):
         :param redirect_location: The location to redirect.
 
         """
-        headers = {'x-amz-website-redirect-location': redirect_location}
+        if headers is None:
+            headers = {}
+        else:
+            headers = headers.copy()
+
+        headers['x-amz-website-redirect-location'] = redirect_location
         response = self.bucket.connection.make_request('PUT', self.bucket.name,
                                                        self.name, headers)
         if response.status == 200:
@@ -626,20 +676,12 @@ class Key(object):
             point point at the offset from which you wish to upload.
             ie. if uploading the full file, it should point at the
             start of the file. Normally when a file is opened for
-            reading, the fp will point at the first byte. See the
+            reading, the fp will point at the first byte.  See the
             bytes parameter below for more info.
 
         :type headers: dict
         :param headers: The headers to pass along with the PUT request
 
-        :type cb: function
-        :param cb: a callback function that will be called to report
-            progress on the upload.  The callback should accept two
-            integer parameters, the first representing the number of
-            bytes that have been successfully transmitted to S3 and
-            the second representing the size of the to be transmitted
-            object.
-
         :type num_cb: int
         :param num_cb: (optional) If a callback is specified with the
             cb parameter this parameter determines the granularity of
@@ -648,6 +690,13 @@ class Key(object):
             transfer. Providing a negative integer will cause your
             callback to be called with each buffer read.
 
+        :type query_args: string
+        :param query_args: (optional) Arguments to pass in the query string.
+
+        :type chunked_transfer: boolean
+        :param chunked_transfer: (optional) If true, we use chunked
+            Transfer-Encoding.
+
         :type size: int
         :param size: (optional) The Maximum number of bytes to read
             from the file pointer (fp). This is useful when uploading
@@ -656,6 +705,13 @@ class Key(object):
             the default behaviour is to read all bytes from the file
             pointer. Less bytes may be available.
         """
+        self._send_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb,
+                                 query_args=query_args,
+                                 chunked_transfer=chunked_transfer, size=size)
+
+    def _send_file_internal(self, fp, headers=None, cb=None, num_cb=10,
+                            query_args=None, chunked_transfer=False, size=None,
+                            hash_algs=None):
         provider = self.bucket.connection.provider
         try:
             spos = fp.tell()
@@ -663,6 +719,12 @@ class Key(object):
             spos = None
             self.read_from_stream = False
 
+        # If hash_algs is unset and the MD5 hasn't already been computed,
+        # default to an MD5 hash_alg to hash the data on-the-fly.
+        if hash_algs is None and not self.md5:
+            hash_algs = {'md5': md5}
+        digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {})
+
         def sender(http_conn, method, path, data, headers):
             # This function is called repeatedly for temporary retries
             # so we must be sure the file pointer is pointing at the
@@ -681,19 +743,13 @@ class Key(object):
                 http_conn.putheader(key, headers[key])
             http_conn.endheaders()
 
-            # Calculate all MD5 checksums on the fly, if not already computed
-            if not self.base64md5:
-                m = md5()
-            else:
-                m = None
-
             save_debug = self.bucket.connection.debug
             self.bucket.connection.debug = 0
-            # If the debuglevel < 3 we don't want to show connection
+            # If the debuglevel < 4 we don't want to show connection
             # payload, so turn off HTTP connection-level debug output (to
             # be restored below).
             # Use the getattr approach to allow this to work in AppEngine.
-            if getattr(http_conn, 'debuglevel', 0) < 3:
+            if getattr(http_conn, 'debuglevel', 0) < 4:
                 http_conn.set_debuglevel(0)
 
             data_len = 0
@@ -709,7 +765,8 @@ class Key(object):
                     # of data transferred, except when we know size.
                     cb_count = (1024 * 1024) / self.BufferSize
                 elif num_cb > 1:
-                    cb_count = int(math.ceil(cb_size / self.BufferSize / (num_cb - 1.0)))
+                    cb_count = int(
+                        math.ceil(cb_size / self.BufferSize / (num_cb - 1.0)))
                 elif num_cb < 0:
                     cb_count = -1
                 else:
@@ -734,8 +791,8 @@ class Key(object):
                     http_conn.send('\r\n')
                 else:
                     http_conn.send(chunk)
-                if m:
-                    m.update(chunk)
+                for alg in digesters:
+                    digesters[alg].update(chunk)
                 if bytes_togo:
                     bytes_togo -= chunk_len
                     if bytes_togo <= 0:
@@ -752,10 +809,8 @@ class Key(object):
 
             self.size = data_len
 
-            if m:
-                # Use the chunked trailer for the digest
-                hd = m.hexdigest()
-                self.md5, self.base64md5 = self.get_md5_from_hexdigest(hd)
+            for alg in digesters:
+                self.local_hashes[alg] = digesters[alg].digest()
 
             if chunked_transfer:
                 http_conn.send('0\r\n')
@@ -765,10 +820,10 @@ class Key(object):
             if cb and (cb_count <= 1 or i > 0) and data_len > 0:
                 cb(data_len, cb_size)
 
-            response = http_conn.getresponse()
-            body = response.read()
             http_conn.set_debuglevel(save_debug)
             self.bucket.connection.debug = save_debug
+            response = http_conn.getresponse()
+            body = response.read()
             if ((response.status == 500 or response.status == 503 or
                     response.getheader('location')) and not chunked_transfer):
                 # we'll try again.
@@ -826,6 +881,7 @@ class Key(object):
                                                    sender=sender,
                                                    query_args=query_args)
         self.handle_version_headers(resp, force=True)
+        self.handle_addl_headers(resp.getheaders())
 
     def compute_md5(self, fp, size=None):
         """
@@ -838,14 +894,9 @@ class Key(object):
         :param size: (optional) The Maximum number of bytes to read
             from the file pointer (fp). This is useful when uploading
             a file in multiple parts where the file is being split
-            inplace into different parts. Less bytes may be available.
-
-        :rtype: tuple
-        :return: A tuple containing the hex digest version of the MD5
-            hash as the first element and the base64 encoded version
-            of the plain digest as the second element.
+            in place into different parts. Less bytes may be available.
         """
-        tup = compute_md5(fp, size=size)
+        hex_digest, b64_digest, data_size = compute_md5(fp, size=size)
         # Returned values are MD5 hash, base64 encoded MD5 hash, and data size.
         # The internal implementation of compute_md5() needs to return the
         # data size but we don't want to return that value to the external
@@ -853,8 +904,8 @@ class Key(object):
         # break some code) so we consume the third tuple value here and
         # return the remainder of the tuple to the caller, thereby preserving
         # the existing interface.
-        self.size = tup[2]
-        return tup[0:2]
+        self.size = data_size
+        return (hex_digest, b64_digest)
 
     def set_contents_from_stream(self, fp, headers=None, replace=True,
                                  cb=None, num_cb=10, policy=None,
@@ -1179,14 +1230,15 @@ class Key(object):
             :param encrypt_key: If True, the new copy of the object
             will be encrypted on the server-side by S3 and will be
             stored in an encrypted form while at rest in S3.
+
+        :rtype: int
+        :return: The number of bytes written to the key.
         """
-        fp = open(filename, 'rb')
-        try:
-            self.set_contents_from_file(fp, headers, replace, cb, num_cb,
-                                        policy, md5, reduced_redundancy,
-                                        encrypt_key=encrypt_key)
-        finally:
-            fp.close()
+        with open(filename, 'rb') as fp:
+            return self.set_contents_from_file(fp, headers, replace, cb,
+                                               num_cb, policy, md5,
+                                               reduced_redundancy,
+                                               encrypt_key=encrypt_key)
 
     def set_contents_from_string(self, s, headers=None, replace=True,
                                  cb=None, num_cb=10, policy=None, md5=None,
@@ -1297,11 +1349,12 @@ class Key(object):
                                 torrent=torrent, version_id=version_id,
                                 override_num_retries=override_num_retries,
                                 response_headers=response_headers,
+                                hash_algs=None,
                                 query_args=None)
 
     def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10,
                  torrent=False, version_id=None, override_num_retries=None,
-                 response_headers=None, query_args=None):
+                 response_headers=None, hash_algs=None, query_args=None):
         if headers is None:
             headers = {}
         save_debug = self.bucket.connection.debug
@@ -1311,9 +1364,11 @@ class Key(object):
         query_args = query_args or []
         if torrent:
             query_args.append('torrent')
-            m = None
-        else:
-            m = md5()
+
+        if hash_algs is None and not torrent:
+            hash_algs = {'md5': md5}
+        digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {})
+
         # If a version_id is passed in, use that.  If not, check to see
         # if the Key object has an explicit version_id and, if so, use that.
         # Otherwise, don't pass a version_id query param.
@@ -1323,7 +1378,8 @@ class Key(object):
             query_args.append('versionId=%s' % version_id)
         if response_headers:
             for key in response_headers:
-                query_args.append('%s=%s' % (key, urllib.quote(response_headers[key])))
+                query_args.append('%s=%s' % (
+                    key, urllib.quote(response_headers[key])))
         query_args = '&'.join(query_args)
         self.open('r', headers, query_args=query_args,
                   override_num_retries=override_num_retries)
@@ -1346,22 +1402,27 @@ class Key(object):
                 cb_count = 0
             i = 0
             cb(data_len, cb_size)
-        for bytes in self:
-            fp.write(bytes)
-            data_len += len(bytes)
-            if m:
-                m.update(bytes)
-            if cb:
-                if cb_size > 0 and data_len >= cb_size:
-                    break
-                i += 1
-                if i == cb_count or cb_count == -1:
-                    cb(data_len, cb_size)
-                    i = 0
+        try:
+            for bytes in self:
+                fp.write(bytes)
+                data_len += len(bytes)
+                for alg in digesters:
+                    digesters[alg].update(bytes)
+                if cb:
+                    if cb_size > 0 and data_len >= cb_size:
+                        break
+                    i += 1
+                    if i == cb_count or cb_count == -1:
+                        cb(data_len, cb_size)
+                        i = 0
+        except IOError as e:
+            if e.errno == errno.ENOSPC:
+                raise StorageDataError('Out of space for destination file '
+                                       '%s' % fp.name)
         if cb and (cb_count <= 1 or i > 0) and data_len > 0:
             cb(data_len, cb_size)
-        if m:
-            self.md5 = m.hexdigest()
+        for alg in digesters:
+          self.local_hashes[alg] = digesters[alg].digest()
         if self.size is None and not torrent and "Range" not in headers:
             self.size = data_len
         self.close()
@@ -1670,7 +1731,8 @@ class Key(object):
             rewritten_metadata[rewritten_h] = metadata[h]
         metadata = rewritten_metadata
         src_bucket.copy_key(self.name, self.bucket.name, self.name,
-                            metadata=metadata, preserve_acl=preserve_acl)
+                            metadata=metadata, preserve_acl=preserve_acl,
+                            headers=headers)
 
     def restore(self, days, headers=None):
         """Restore an object from an archive.