From 2a55301f9fe1c3b62ad4e79c3109bec77b57b317 Mon Sep 17 00:00:00 2001
From: Matthew Somerville <matthew-github@dracos.co.uk>
Date: Thu, 29 Jan 2015 07:59:41 +0000
Subject: [PATCH] [1.8.x] Fixed #24242 -- Improved efficiency of
 utils.text.compress_sequence()

The function no longer flushes zfile after each write as doing so can
lead to the gzipped streamed content being larger than the original
content; each flush adds a 5/6 byte type 0 block. Removing this means
buf.read() may return nothing, so only yield if that has some data.
Testing shows without the flush() the buffer is being flushed every 17k
or so and compresses the same as if it had been done as a whole string.

Backport of caa3562d5bec1196502352a715a539bdb0f73c2d from master
---
 django/utils/text.py           |  7 +++++--
 tests/utils_tests/test_text.py | 12 +++++++++++-
 2 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/django/utils/text.py b/django/utils/text.py
index 37bcd3150e..8446b0a238 100644
--- a/django/utils/text.py
+++ b/django/utils/text.py
@@ -304,6 +304,8 @@ class StreamingBuffer(object):
         self.vals.append(val)
 
     def read(self):
+        if not self.vals:
+            return b''
         ret = b''.join(self.vals)
         self.vals = []
         return ret
@@ -323,8 +325,9 @@ def compress_sequence(sequence):
     yield buf.read()
     for item in sequence:
         zfile.write(item)
-        zfile.flush()
-        yield buf.read()
+        data = buf.read()
+        if data:
+            yield data
     zfile.close()
     yield buf.read()
 
diff --git a/tests/utils_tests/test_text.py b/tests/utils_tests/test_text.py
index 142963893e..084645da27 100644
--- a/tests/utils_tests/test_text.py
+++ b/tests/utils_tests/test_text.py
@@ -1,8 +1,9 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
-from unittest import skipUnless
+import json
 import warnings
+from unittest import skipUnless
 
 from django.test import SimpleTestCase, ignore_warnings
 from django.test.utils import reset_warning_registry
@@ -198,6 +199,15 @@ class TestUtilsText(SimpleTestCase):
         filename = "^&'@{}[],$=!-#()%+~_123.txt"
         self.assertEqual(text.get_valid_filename(filename), "-_123.txt")
 
+    def test_compress_sequence(self):
+        data = [{'key': i} for i in range(10)]
+        seq = list(json.JSONEncoder().iterencode(data))
+        seq = [s.encode('utf-8') for s in seq]
+        actual_length = len(b''.join(seq))
+        out = text.compress_sequence(seq)
+        compressed_length = len(b''.join(out))
+        self.assertTrue(compressed_length < actual_length)
+
     @ignore_warnings(category=RemovedInDjango19Warning)
     def test_javascript_quote(self):
         input = "<script>alert('Hello \\xff.\n Welcome\there\r');</script>"