summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Somerville <matthew-github@dracos.co.uk>2015-01-29 07:59:41 +0000
committerTim Graham <timograham@gmail.com>2015-02-04 13:04:00 -0500
commitcaa3562d5bec1196502352a715a539bdb0f73c2d (patch)
tree3fda98a627ab4100f8e03c510e3bfc9e3e3f21a4
parent2730dad0d7c425f33f1ecc6cec01fdbf1cdd8376 (diff)
downloaddjango-caa3562d5bec1196502352a715a539bdb0f73c2d.tar.gz
Fixed #24242 -- Improved efficiency of utils.text.compress_sequence()
The function no longer flushes zfile after each write as doing so can lead to the gzipped streamed content being larger than the original content; each flush adds a 5/6 byte type 0 block. Removing this means buf.read() may return nothing, so only yield if that has some data. Testing shows without the flush() the buffer is being flushed every 17k or so and compresses the same as if it had been done as a whole string.
-rw-r--r--django/utils/text.py7
-rw-r--r--tests/utils_tests/test_text.py11
2 files changed, 16 insertions, 2 deletions
diff --git a/django/utils/text.py b/django/utils/text.py
index 88c38f4b2f..ad549c349d 100644
--- a/django/utils/text.py
+++ b/django/utils/text.py
@@ -302,6 +302,8 @@ class StreamingBuffer(object):
self.vals.append(val)
def read(self):
+ if not self.vals:
+ return b''
ret = b''.join(self.vals)
self.vals = []
return ret
@@ -321,8 +323,9 @@ def compress_sequence(sequence):
yield buf.read()
for item in sequence:
zfile.write(item)
- zfile.flush()
- yield buf.read()
+ data = buf.read()
+ if data:
+ yield data
zfile.close()
yield buf.read()
diff --git a/tests/utils_tests/test_text.py b/tests/utils_tests/test_text.py
index aa18534688..d8675fa8c5 100644
--- a/tests/utils_tests/test_text.py
+++ b/tests/utils_tests/test_text.py
@@ -1,6 +1,8 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
+import json
+
from django.test import SimpleTestCase
from django.utils import six, text
from django.utils.encoding import force_text
@@ -192,3 +194,12 @@ class TestUtilsText(SimpleTestCase):
def test_get_valid_filename(self):
filename = "^&'@{}[],$=!-#()%+~_123.txt"
self.assertEqual(text.get_valid_filename(filename), "-_123.txt")
+
+ def test_compress_sequence(self):
+ data = [{'key': i} for i in range(10)]
+ seq = list(json.JSONEncoder().iterencode(data))
+ seq = [s.encode('utf-8') for s in seq]
+ actual_length = len(b''.join(seq))
+ out = text.compress_sequence(seq)
+ compressed_length = len(b''.join(out))
+ self.assertTrue(compressed_length < actual_length)