Fixed #24242 -- Improved efficiency of utils.text.compress_sequence()

The function no longer flushes zfile after each write as doing so can lead to the gzipped streamed content being larger than the original content; each flush adds a 5/6 byte type 0 block. Removing this means buf.read() may return nothing, so only yield if that has some data. Testing shows without the flush() the buffer is being flushed every 17k or so and compresses the same as if it had been done as a whole string.
2025-09-15 21:49:24 +00:00 · 2015-01-29 07:59:41 +00:00 · 2015-01-29 07:59:41 +00:00 · caa3562d5b
commit caa3562d5b
parent 2730dad0d7
2 changed files with 16 additions and 2 deletions
--- a/django/utils/text.py
+++ b/django/utils/text.py
@ -302,6 +302,8 @@ class StreamingBuffer(object):
        self.vals.append(val)

    def read(self):
+        if not self.vals:
+            return b''
        ret = b''.join(self.vals)
        self.vals = []
        return ret
@ -321,8 +323,9 @@ def compress_sequence(sequence):
    yield buf.read()
    for item in sequence:
        zfile.write(item)
-        zfile.flush()
-        yield buf.read()
+        data = buf.read()
+        if data:
+            yield data
    zfile.close()
    yield buf.read()

--- a/tests/utils_tests/test_text.py
+++ b/tests/utils_tests/test_text.py
@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals

+import json
+
 from django.test import SimpleTestCase
 from django.utils import six, text
 from django.utils.encoding import force_text
@ -192,3 +194,12 @@ class TestUtilsText(SimpleTestCase):
    def test_get_valid_filename(self):
        filename = "^&'@{}[],$=!-#()%+~_123.txt"
        self.assertEqual(text.get_valid_filename(filename), "-_123.txt")
+
+    def test_compress_sequence(self):
+        data = [{'key': i} for i in range(10)]
+        seq = list(json.JSONEncoder().iterencode(data))
+        seq = [s.encode('utf-8') for s in seq]
+        actual_length = len(b''.join(seq))
+        out = text.compress_sequence(seq)
+        compressed_length = len(b''.join(out))
+        self.assertTrue(compressed_length < actual_length)