summaryrefslogtreecommitdiff
path: root/boto/s3/key.py
diff options
context:
space:
mode:
Diffstat (limited to 'boto/s3/key.py')
-rw-r--r--boto/s3/key.py224
1 files changed, 143 insertions, 81 deletions
diff --git a/boto/s3/key.py b/boto/s3/key.py
index 71013a54..fa9bc61f 100644
--- a/boto/s3/key.py
+++ b/boto/s3/key.py
@@ -21,6 +21,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
+import errno
import mimetypes
import os
import re
@@ -32,6 +33,7 @@ import math
import urllib
import boto.utils
from boto.exception import BotoClientError
+from boto.exception import StorageDataError
from boto.provider import Provider
from boto.s3.keyfile import KeyFile
from boto.s3.user import User
@@ -109,8 +111,6 @@ class Key(object):
self.last_modified = None
self.owner = None
self.storage_class = 'STANDARD'
- self.md5 = None
- self.base64md5 = None
self.path = None
self.resp = None
self.mode = None
@@ -126,6 +126,7 @@ class Key(object):
# restored object.
self.ongoing_restore = None
self.expiry_date = None
+ self.local_hashes = {}
def __repr__(self):
if self.bucket:
@@ -133,18 +134,6 @@ class Key(object):
else:
return '<Key: None,%s>' % self.name
- def __getattr__(self, name):
- if name == 'key':
- return self.name
- else:
- raise AttributeError
-
- def __setattr__(self, name, value):
- if name == 'key':
- self.__dict__['name'] = value
- else:
- self.__dict__[name] = value
-
def __iter__(self):
return self
@@ -155,6 +144,38 @@ class Key(object):
provider = self.bucket.connection.provider
return provider
+ @property
+ def key(self):
+ return self.name
+
+ @key.setter
+ def key(self, value):
+ self.name = value
+
+ @property
+ def md5(self):
+ if 'md5' in self.local_hashes and self.local_hashes['md5']:
+ return binascii.b2a_hex(self.local_hashes['md5'])
+
+ @md5.setter
+ def md5(self, value):
+ if value:
+ self.local_hashes['md5'] = binascii.a2b_hex(value)
+ elif 'md5' in self.local_hashes:
+ self.local_hashes.pop('md5', None)
+
+ @property
+ def base64md5(self):
+ if 'md5' in self.local_hashes and self.local_hashes['md5']:
+ return binascii.b2a_base64(self.local_hashes['md5']).rstrip('\n')
+
+ @base64md5.setter
+ def base64md5(self, value):
+ if value:
+ self.local_hashes['md5'] = binascii.a2b_base64(value)
+ elif 'md5' in self.local_hashes:
+ del self.local_hashes['md5']
+
def get_md5_from_hexdigest(self, md5_hexdigest):
"""
A utility function to create the 2-tuple (md5hexdigest, base64md5)
@@ -169,7 +190,8 @@ class Key(object):
def handle_encryption_headers(self, resp):
provider = self.bucket.connection.provider
if provider.server_side_encryption_header:
- self.encrypted = resp.getheader(provider.server_side_encryption_header, None)
+ self.encrypted = resp.getheader(
+ provider.server_side_encryption_header, None)
else:
self.encrypted = None
@@ -202,6 +224,13 @@ class Key(object):
elif key == 'expiry-date':
self.expiry_date = val
+ def handle_addl_headers(self, headers):
+ """
+ Used by Key subclasses to do additional, provider-specific
+ processing of response headers. No-op for this base class.
+ """
+ pass
+
def open_read(self, headers=None, query_args='',
override_num_retries=None, response_headers=None):
"""
@@ -265,6 +294,7 @@ class Key(object):
self.content_disposition = value
self.handle_version_headers(self.resp)
self.handle_encryption_headers(self.resp)
+ self.handle_addl_headers(self.resp.getheaders())
def open_write(self, headers=None, override_num_retries=None):
"""
@@ -295,8 +325,23 @@ class Key(object):
closed = False
- def close(self):
- if self.resp:
+ def close(self, fast=False):
+ """
+ Close this key.
+
+ :type fast: bool
+ :param fast: True if you want the connection to be closed without first
+ reading the content. This should only be used in cases where subsequent
+ calls don't need to return the content from the open HTTP connection.
+ Note: As explained at
+ http://docs.python.org/2/library/httplib.html#httplib.HTTPConnection.getresponse,
+ callers must read the whole response before sending a new request to the
+ server. Calling Key.close(fast=True) and making a subsequent request to
+ the server will work because boto will get an httplib exception and
+ close/reopen the connection.
+
+ """
+ if self.resp and not fast:
self.resp.read()
self.resp = None
self.mode = None
@@ -513,7 +558,7 @@ class Key(object):
raise self.provider.storage_response_error(
response.status, response.reason, response.read())
- def set_redirect(self, redirect_location):
+ def set_redirect(self, redirect_location, headers=None):
"""Configure this key to redirect to another location.
When the bucket associated with this key is accessed from the website
@@ -524,7 +569,12 @@ class Key(object):
:param redirect_location: The location to redirect.
"""
- headers = {'x-amz-website-redirect-location': redirect_location}
+ if headers is None:
+ headers = {}
+ else:
+ headers = headers.copy()
+
+ headers['x-amz-website-redirect-location'] = redirect_location
response = self.bucket.connection.make_request('PUT', self.bucket.name,
self.name, headers)
if response.status == 200:
@@ -626,20 +676,12 @@ class Key(object):
point point at the offset from which you wish to upload.
ie. if uploading the full file, it should point at the
start of the file. Normally when a file is opened for
- reading, the fp will point at the first byte. See the
+ reading, the fp will point at the first byte. See the
bytes parameter below for more info.
:type headers: dict
:param headers: The headers to pass along with the PUT request
- :type cb: function
- :param cb: a callback function that will be called to report
- progress on the upload. The callback should accept two
- integer parameters, the first representing the number of
- bytes that have been successfully transmitted to S3 and
- the second representing the size of the to be transmitted
- object.
-
:type num_cb: int
:param num_cb: (optional) If a callback is specified with the
cb parameter this parameter determines the granularity of
@@ -648,6 +690,13 @@ class Key(object):
transfer. Providing a negative integer will cause your
callback to be called with each buffer read.
+ :type query_args: string
+ :param query_args: (optional) Arguments to pass in the query string.
+
+ :type chunked_transfer: boolean
+ :param chunked_transfer: (optional) If true, we use chunked
+ Transfer-Encoding.
+
:type size: int
:param size: (optional) The Maximum number of bytes to read
from the file pointer (fp). This is useful when uploading
@@ -656,6 +705,13 @@ class Key(object):
the default behaviour is to read all bytes from the file
pointer. Less bytes may be available.
"""
+ self._send_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb,
+ query_args=query_args,
+ chunked_transfer=chunked_transfer, size=size)
+
+ def _send_file_internal(self, fp, headers=None, cb=None, num_cb=10,
+ query_args=None, chunked_transfer=False, size=None,
+ hash_algs=None):
provider = self.bucket.connection.provider
try:
spos = fp.tell()
@@ -663,6 +719,12 @@ class Key(object):
spos = None
self.read_from_stream = False
+ # If hash_algs is unset and the MD5 hasn't already been computed,
+ # default to an MD5 hash_alg to hash the data on-the-fly.
+ if hash_algs is None and not self.md5:
+ hash_algs = {'md5': md5}
+ digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {})
+
def sender(http_conn, method, path, data, headers):
# This function is called repeatedly for temporary retries
# so we must be sure the file pointer is pointing at the
@@ -681,19 +743,13 @@ class Key(object):
http_conn.putheader(key, headers[key])
http_conn.endheaders()
- # Calculate all MD5 checksums on the fly, if not already computed
- if not self.base64md5:
- m = md5()
- else:
- m = None
-
save_debug = self.bucket.connection.debug
self.bucket.connection.debug = 0
- # If the debuglevel < 3 we don't want to show connection
+ # If the debuglevel < 4 we don't want to show connection
# payload, so turn off HTTP connection-level debug output (to
# be restored below).
# Use the getattr approach to allow this to work in AppEngine.
- if getattr(http_conn, 'debuglevel', 0) < 3:
+ if getattr(http_conn, 'debuglevel', 0) < 4:
http_conn.set_debuglevel(0)
data_len = 0
@@ -709,7 +765,8 @@ class Key(object):
# of data transferred, except when we know size.
cb_count = (1024 * 1024) / self.BufferSize
elif num_cb > 1:
- cb_count = int(math.ceil(cb_size / self.BufferSize / (num_cb - 1.0)))
+ cb_count = int(
+ math.ceil(cb_size / self.BufferSize / (num_cb - 1.0)))
elif num_cb < 0:
cb_count = -1
else:
@@ -734,8 +791,8 @@ class Key(object):
http_conn.send('\r\n')
else:
http_conn.send(chunk)
- if m:
- m.update(chunk)
+ for alg in digesters:
+ digesters[alg].update(chunk)
if bytes_togo:
bytes_togo -= chunk_len
if bytes_togo <= 0:
@@ -752,10 +809,8 @@ class Key(object):
self.size = data_len
- if m:
- # Use the chunked trailer for the digest
- hd = m.hexdigest()
- self.md5, self.base64md5 = self.get_md5_from_hexdigest(hd)
+ for alg in digesters:
+ self.local_hashes[alg] = digesters[alg].digest()
if chunked_transfer:
http_conn.send('0\r\n')
@@ -765,10 +820,10 @@ class Key(object):
if cb and (cb_count <= 1 or i > 0) and data_len > 0:
cb(data_len, cb_size)
- response = http_conn.getresponse()
- body = response.read()
http_conn.set_debuglevel(save_debug)
self.bucket.connection.debug = save_debug
+ response = http_conn.getresponse()
+ body = response.read()
if ((response.status == 500 or response.status == 503 or
response.getheader('location')) and not chunked_transfer):
# we'll try again.
@@ -826,6 +881,7 @@ class Key(object):
sender=sender,
query_args=query_args)
self.handle_version_headers(resp, force=True)
+ self.handle_addl_headers(resp.getheaders())
def compute_md5(self, fp, size=None):
"""
@@ -838,14 +894,9 @@ class Key(object):
:param size: (optional) The Maximum number of bytes to read
from the file pointer (fp). This is useful when uploading
a file in multiple parts where the file is being split
- inplace into different parts. Less bytes may be available.
-
- :rtype: tuple
- :return: A tuple containing the hex digest version of the MD5
- hash as the first element and the base64 encoded version
- of the plain digest as the second element.
+ in place into different parts. Less bytes may be available.
"""
- tup = compute_md5(fp, size=size)
+ hex_digest, b64_digest, data_size = compute_md5(fp, size=size)
# Returned values are MD5 hash, base64 encoded MD5 hash, and data size.
# The internal implementation of compute_md5() needs to return the
# data size but we don't want to return that value to the external
@@ -853,8 +904,8 @@ class Key(object):
# break some code) so we consume the third tuple value here and
# return the remainder of the tuple to the caller, thereby preserving
# the existing interface.
- self.size = tup[2]
- return tup[0:2]
+ self.size = data_size
+ return (hex_digest, b64_digest)
def set_contents_from_stream(self, fp, headers=None, replace=True,
cb=None, num_cb=10, policy=None,
@@ -1179,14 +1230,15 @@ class Key(object):
:param encrypt_key: If True, the new copy of the object
will be encrypted on the server-side by S3 and will be
stored in an encrypted form while at rest in S3.
+
+ :rtype: int
+ :return: The number of bytes written to the key.
"""
- fp = open(filename, 'rb')
- try:
- self.set_contents_from_file(fp, headers, replace, cb, num_cb,
- policy, md5, reduced_redundancy,
- encrypt_key=encrypt_key)
- finally:
- fp.close()
+ with open(filename, 'rb') as fp:
+ return self.set_contents_from_file(fp, headers, replace, cb,
+ num_cb, policy, md5,
+ reduced_redundancy,
+ encrypt_key=encrypt_key)
def set_contents_from_string(self, s, headers=None, replace=True,
cb=None, num_cb=10, policy=None, md5=None,
@@ -1297,11 +1349,12 @@ class Key(object):
torrent=torrent, version_id=version_id,
override_num_retries=override_num_retries,
response_headers=response_headers,
+ hash_algs=None,
query_args=None)
def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10,
torrent=False, version_id=None, override_num_retries=None,
- response_headers=None, query_args=None):
+ response_headers=None, hash_algs=None, query_args=None):
if headers is None:
headers = {}
save_debug = self.bucket.connection.debug
@@ -1311,9 +1364,11 @@ class Key(object):
query_args = query_args or []
if torrent:
query_args.append('torrent')
- m = None
- else:
- m = md5()
+
+ if hash_algs is None and not torrent:
+ hash_algs = {'md5': md5}
+ digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {})
+
# If a version_id is passed in, use that. If not, check to see
# if the Key object has an explicit version_id and, if so, use that.
# Otherwise, don't pass a version_id query param.
@@ -1323,7 +1378,8 @@ class Key(object):
query_args.append('versionId=%s' % version_id)
if response_headers:
for key in response_headers:
- query_args.append('%s=%s' % (key, urllib.quote(response_headers[key])))
+ query_args.append('%s=%s' % (
+ key, urllib.quote(response_headers[key])))
query_args = '&'.join(query_args)
self.open('r', headers, query_args=query_args,
override_num_retries=override_num_retries)
@@ -1346,22 +1402,27 @@ class Key(object):
cb_count = 0
i = 0
cb(data_len, cb_size)
- for bytes in self:
- fp.write(bytes)
- data_len += len(bytes)
- if m:
- m.update(bytes)
- if cb:
- if cb_size > 0 and data_len >= cb_size:
- break
- i += 1
- if i == cb_count or cb_count == -1:
- cb(data_len, cb_size)
- i = 0
+ try:
+ for bytes in self:
+ fp.write(bytes)
+ data_len += len(bytes)
+ for alg in digesters:
+ digesters[alg].update(bytes)
+ if cb:
+ if cb_size > 0 and data_len >= cb_size:
+ break
+ i += 1
+ if i == cb_count or cb_count == -1:
+ cb(data_len, cb_size)
+ i = 0
+ except IOError as e:
+ if e.errno == errno.ENOSPC:
+ raise StorageDataError('Out of space for destination file '
+ '%s' % fp.name)
if cb and (cb_count <= 1 or i > 0) and data_len > 0:
cb(data_len, cb_size)
- if m:
- self.md5 = m.hexdigest()
+ for alg in digesters:
+ self.local_hashes[alg] = digesters[alg].digest()
if self.size is None and not torrent and "Range" not in headers:
self.size = data_len
self.close()
@@ -1670,7 +1731,8 @@ class Key(object):
rewritten_metadata[rewritten_h] = metadata[h]
metadata = rewritten_metadata
src_bucket.copy_key(self.name, self.bucket.name, self.name,
- metadata=metadata, preserve_acl=preserve_acl)
+ metadata=metadata, preserve_acl=preserve_acl,
+ headers=headers)
def restore(self, days, headers=None):
"""Restore an object from an archive.