mirror of
				https://github.com/django/django.git
				synced 2025-10-25 14:46:09 +00:00 
			
		
		
		
	Fixed #33865 -- Optimized LimitedStream wrapper.
The current implementation of LimitedStream is slow because .read() performs an extra copy into a buffer and .readline() performs two extra copies. The stream being wrapped is already typically a BytesIO object so this is unnecessary. This implementation has largely been untouched for 12 years and, inspired by a simpler implementation in werkzeug, it was possible to achieve the following performance improvement: LimitedStream.read() (single line): Mean +- std dev: [bench_limitedstream-main] 286 ns +- 6 ns -> [bench_limitedstream-patch] 227 ns +- 6 ns: 1.26x faster LimitedStream.readline() (single line): Mean +- std dev: [bench_limitedstream-main] 507 ns +- 11 ns -> [bench_limitedstream-patch] 232 ns +- 8 ns: 2.18x faster LimitedStream.read(8192) (single line): Mean +- std dev: [bench_limitedstream-main] 360 ns +- 8 ns -> [bench_limitedstream-patch] 297 ns +- 6 ns: 1.21x faster LimitedStream.readline(8192) (single line): Mean +- std dev: [bench_limitedstream-main] 602 ns +- 10 ns -> [bench_limitedstream-patch] 305 ns +- 10 ns: 1.98x faster LimitedStream.read() (multiple lines): Mean +- std dev: [bench_limitedstream-main] 290 ns +- 5 ns -> [bench_limitedstream-patch] 236 ns +- 6 ns: 1.23x faster LimitedStream.readline() (multiple lines): Mean +- std dev: [bench_limitedstream-main] 517 ns +- 19 ns -> [bench_limitedstream-patch] 239 ns +- 7 ns: 2.16x faster LimitedStream.read(8192) (multiple lines): Mean +- std dev: [bench_limitedstream-main] 363 ns +- 8 ns -> [bench_limitedstream-patch] 311 ns +- 11 ns: 1.17x faster LimitedStream.readline(8192) (multiple lines): Mean +- std dev: [bench_limitedstream-main] 601 ns +- 12 ns -> [bench_limitedstream-patch] 308 ns +- 7 ns: 1.95x faster Geometric mean: 1.59x faster
This commit is contained in:
		
				
					committed by
					
						 Mariusz Felisiak
						Mariusz Felisiak
					
				
			
			
				
	
			
			
			
						parent
						
							57f5669d23
						
					
				
				
					commit
					b47f2f5b90
				
			| @@ -1,4 +1,4 @@ | ||||
| from io import BytesIO | ||||
| from io import IOBase | ||||
|  | ||||
| from django.conf import settings | ||||
| from django.core import signals | ||||
| @@ -12,56 +12,46 @@ from django.utils.regex_helper import _lazy_re_compile | ||||
| _slashes_re = _lazy_re_compile(rb"/+") | ||||
|  | ||||
|  | ||||
| class LimitedStream: | ||||
|     """Wrap another stream to disallow reading it past a number of bytes.""" | ||||
| class LimitedStream(IOBase): | ||||
|     """ | ||||
|     Wrap another stream to disallow reading it past a number of bytes. | ||||
|  | ||||
|     Based on the implementation from werkzeug.wsgi.LimitedStream | ||||
|     See https://github.com/pallets/werkzeug/blob/dbf78f67/src/werkzeug/wsgi.py#L828 | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, stream, limit): | ||||
|         self.stream = stream | ||||
|         self.remaining = limit | ||||
|         self.buffer = b"" | ||||
|         self._read = stream.read | ||||
|         self._readline = stream.readline | ||||
|         self._pos = 0 | ||||
|         self.limit = limit | ||||
|  | ||||
|     def _read_limited(self, size=None): | ||||
|         if size is None or size > self.remaining: | ||||
|             size = self.remaining | ||||
|         if size == 0: | ||||
|     def read(self, size=-1, /): | ||||
|         _pos = self._pos | ||||
|         limit = self.limit | ||||
|         if _pos >= limit: | ||||
|             return b"" | ||||
|         result = self.stream.read(size) | ||||
|         self.remaining -= len(result) | ||||
|         return result | ||||
|  | ||||
|     def read(self, size=None): | ||||
|         if size is None: | ||||
|             result = self.buffer + self._read_limited() | ||||
|             self.buffer = b"" | ||||
|         elif size < len(self.buffer): | ||||
|             result = self.buffer[:size] | ||||
|             self.buffer = self.buffer[size:] | ||||
|         else:  # size >= len(self.buffer) | ||||
|             result = self.buffer + self._read_limited(size - len(self.buffer)) | ||||
|             self.buffer = b"" | ||||
|         return result | ||||
|  | ||||
|     def readline(self, size=None): | ||||
|         while b"\n" not in self.buffer and (size is None or len(self.buffer) < size): | ||||
|             if size: | ||||
|                 # since size is not None here, len(self.buffer) < size | ||||
|                 chunk = self._read_limited(size - len(self.buffer)) | ||||
|         if size == -1 or size is None: | ||||
|             size = limit - _pos | ||||
|         else: | ||||
|                 chunk = self._read_limited() | ||||
|             if not chunk: | ||||
|                 break | ||||
|             self.buffer += chunk | ||||
|         sio = BytesIO(self.buffer) | ||||
|         if size: | ||||
|             line = sio.readline(size) | ||||
|             size = min(size, limit - _pos) | ||||
|         data = self._read(size) | ||||
|         self._pos += len(data) | ||||
|         return data | ||||
|  | ||||
|     def readline(self, size=-1, /): | ||||
|         _pos = self._pos | ||||
|         limit = self.limit | ||||
|         if _pos >= limit: | ||||
|             return b"" | ||||
|         if size == -1 or size is None: | ||||
|             size = limit - _pos | ||||
|         else: | ||||
|             line = sio.readline() | ||||
|         self.buffer = sio.read() | ||||
|             size = min(size, limit - _pos) | ||||
|         line = self._readline(size) | ||||
|         self._pos += len(line) | ||||
|         return line | ||||
|  | ||||
|     def close(self): | ||||
|         pass | ||||
|  | ||||
|  | ||||
| class WSGIRequest(HttpRequest): | ||||
|     non_picklable_attrs = HttpRequest.non_picklable_attrs | frozenset(["environ"]) | ||||
|   | ||||
| @@ -144,7 +144,7 @@ class ServerHandler(simple_server.ServerHandler): | ||||
|             self.request_handler.close_connection = True | ||||
|  | ||||
|     def close(self): | ||||
|         self.get_stdin()._read_limited() | ||||
|         self.get_stdin().read() | ||||
|         super().close() | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -523,6 +523,10 @@ Miscellaneous | ||||
|  | ||||
| .. _`redis-py`: https://pypi.org/project/redis/ | ||||
|  | ||||
| * Manually instantiated ``WSGIRequest`` objects must be provided a file-like | ||||
|   object for ``wsgi.input``. Previously, Django was more lax than the expected | ||||
|   behavior as specified by the WSGI specification. | ||||
|  | ||||
| .. _deprecated-features-4.2: | ||||
|  | ||||
| Features deprecated in 4.2 | ||||
|   | ||||
| @@ -81,7 +81,7 @@ class WSGIFileWrapperTests(TestCase): | ||||
|  | ||||
|     def test_file_wrapper_uses_sendfile(self): | ||||
|         env = {"SERVER_PROTOCOL": "HTTP/1.0"} | ||||
|         handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env) | ||||
|         handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env) | ||||
|         handler.run(wsgi_app_file_wrapper) | ||||
|         self.assertTrue(handler._used_sendfile) | ||||
|         self.assertEqual(handler.stdout.getvalue(), b"") | ||||
| @@ -89,7 +89,7 @@ class WSGIFileWrapperTests(TestCase): | ||||
|  | ||||
|     def test_file_wrapper_no_sendfile(self): | ||||
|         env = {"SERVER_PROTOCOL": "HTTP/1.0"} | ||||
|         handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env) | ||||
|         handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env) | ||||
|         handler.run(wsgi_app) | ||||
|         self.assertFalse(handler._used_sendfile) | ||||
|         self.assertEqual(handler.stdout.getvalue().splitlines()[-1], b"Hello World!") | ||||
| @@ -102,7 +102,7 @@ class WSGIFileWrapperTests(TestCase): | ||||
|         response when file_wrapper is used. | ||||
|         """ | ||||
|         env = RequestFactory().get("/fileresponse/").environ | ||||
|         handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env) | ||||
|         handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env) | ||||
|         handler.run(get_internal_wsgi_application()) | ||||
|         # Sendfile is used only when file_wrapper has been used. | ||||
|         self.assertTrue(handler._used_sendfile) | ||||
| @@ -119,7 +119,7 @@ class WSGIFileWrapperTests(TestCase): | ||||
|     @override_settings(ROOT_URLCONF="builtin_server.urls") | ||||
|     def test_file_response_call_request_finished(self): | ||||
|         env = RequestFactory().get("/fileresponse/").environ | ||||
|         handler = FileWrapperHandler(None, BytesIO(), BytesIO(), env) | ||||
|         handler = FileWrapperHandler(BytesIO(), BytesIO(), BytesIO(), env) | ||||
|         with mock.MagicMock() as signal_handler: | ||||
|             request_finished.connect(signal_handler) | ||||
|             handler.run(get_internal_wsgi_application()) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user