summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Saryerwinnie <js@jamesls.com>2012-09-06 10:55:18 -0700
committerJames Saryerwinnie <js@jamesls.com>2012-09-06 15:20:41 -0700
commitf117db58ae25a788a0ab522e89986cea6bded31a (patch)
treecdf0e3aa2940eb92dab9449b4ad6f05977b8382b
parentf1b007e6c8f29096e5e20266c66df873b777dc4e (diff)
downloadboto-f117db58ae25a788a0ab522e89986cea6bded31a.tar.gz
Reduce memory usage for chunk_hashes
On a 180MB file this reduced total memory usage by approximately 40%. This was also marginally faster (but not by much). I've also added the start of unittests for the writer module, and I've written some very basic unittests for the chunk_hashes function.
-rw-r--r--boto/glacier/writer.py20
-rw-r--r--tests/unit/glacier/test_writer.py26
2 files changed, 37 insertions, 9 deletions
diff --git a/boto/glacier/writer.py b/boto/glacier/writer.py
index aca94e76..b57723c7 100644
--- a/boto/glacier/writer.py
+++ b/boto/glacier/writer.py
@@ -28,15 +28,17 @@ import math
import json
-def chunk_hashes(str):
- """
- Break up the byte-string into 1MB chunks and return sha256 hashes
- for each.
- """
- chunk = 1024 * 1024
- chunk_count = int(math.ceil(len(str) / float(chunk)))
- chunks = [str[i * chunk:(i + 1) * chunk] for i in range(chunk_count)]
- return [hashlib.sha256(x).digest() for x in chunks]
+_ONE_MEGABYTE = 1024 * 1024
+
+
+def chunk_hashes(bytestring, chunk_size=_ONE_MEGABYTE):
+ chunk_count = int(math.ceil(len(bytestring) / float(chunk_size)))
+ hashes = []
+ for i in xrange(chunk_count):
+ start = i * chunk_size
+ end = (i + 1) * chunk_size
+ hashes.append(hashlib.sha256(bytestring[start:end]).digest())
+ return hashes
def tree_hash(fo):
diff --git a/tests/unit/glacier/test_writer.py b/tests/unit/glacier/test_writer.py
new file mode 100644
index 00000000..216429fd
--- /dev/null
+++ b/tests/unit/glacier/test_writer.py
@@ -0,0 +1,26 @@
+from hashlib import sha256
+
+from tests.unit import unittest
+import mock
+
+from boto.glacier.writer import Writer, chunk_hashes
+
+
+class TestChunking(unittest.TestCase):
+ def test_chunk_hashes_exact(self):
+ chunks = chunk_hashes('a' * (2 * 1024 * 1024))
+ self.assertEqual(len(chunks), 2)
+ self.assertEqual(chunks[0], sha256('a' * 1024 * 1024).digest())
+
+ def test_chunks_with_leftovers(self):
+ bytestring = 'a' * (2 * 1024 * 1024 + 20)
+ chunks = chunk_hashes(bytestring)
+ self.assertEqual(len(chunks), 3)
+ self.assertEqual(chunks[0], sha256('a' * 1024 * 1024).digest())
+ self.assertEqual(chunks[1], sha256('a' * 1024 * 1024).digest())
+ self.assertEqual(chunks[2], sha256('a' * 20).digest())
+
+ def test_less_than_one_chunk(self):
+ chunks = chunk_hashes('aaaa')
+ self.assertEqual(len(chunks), 1)
+ self.assertEqual(chunks[0], sha256('aaaa').digest())