mirror of
				https://github.com/django/django.git
				synced 2025-10-26 07:06:08 +00:00 
			
		
		
		
	Fixed #8149 -- Made File.__iter__() support universal newlines.
The following are recognized as ending a line: the Unix end-of-line convention '\n', the Windows convention '\r\n', and the old Macintosh convention '\r'. http://www.python.org/dev/peps/pep-0278 Thanks tchaumeny for review.
This commit is contained in:
		| @@ -102,16 +102,22 @@ class File(FileProxyMixin): | ||||
|         # Iterate over this file-like object by newlines | ||||
|         buffer_ = None | ||||
|         for chunk in self.chunks(): | ||||
|             chunk_buffer = BytesIO(chunk) | ||||
|  | ||||
|             for line in chunk_buffer: | ||||
|             for line in chunk.splitlines(True): | ||||
|                 if buffer_: | ||||
|                     line = buffer_ + line | ||||
|                     if endswith_cr(buffer_) and not equals_lf(line): | ||||
|                         # Line split after a \r newline; yield buffer_. | ||||
|                         yield buffer_ | ||||
|                         # Continue with line. | ||||
|                     else: | ||||
|                         # Line either split without a newline (line | ||||
|                         # continues after buffer_) or with \r\n | ||||
|                         # newline (line == b'\n'). | ||||
|                         line = buffer_ + line | ||||
|                     # buffer_ handled, clear it. | ||||
|                     buffer_ = None | ||||
|  | ||||
|                 # If this is the end of a line, yield | ||||
|                 # otherwise, wait for the next round | ||||
|                 if line[-1:] in (b'\n', b'\r'): | ||||
|                 # If this is the end of a \n or \r\n line, yield. | ||||
|                 if endswith_lf(line): | ||||
|                     yield line | ||||
|                 else: | ||||
|                     buffer_ = line | ||||
| @@ -165,3 +171,24 @@ class ContentFile(File): | ||||
|  | ||||
|     def close(self): | ||||
|         pass | ||||
|  | ||||
|  | ||||
| def endswith_cr(line): | ||||
|     """ | ||||
|     Return True if line (a text or byte string) ends with '\r'. | ||||
|     """ | ||||
|     return line.endswith('\r' if isinstance(line, six.text_type) else b'\r') | ||||
|  | ||||
|  | ||||
| def endswith_lf(line): | ||||
|     """ | ||||
|     Return True if line (a text or byte string) ends with '\n'. | ||||
|     """ | ||||
|     return line.endswith('\n' if isinstance(line, six.text_type) else b'\n') | ||||
|  | ||||
|  | ||||
| def equals_lf(line): | ||||
|     """ | ||||
|     Return True if line (a text or byte string) equals '\n'. | ||||
|     """ | ||||
|     return line == ('\n' if isinstance(line, six.text_type) else b'\n') | ||||
|   | ||||
| @@ -53,6 +53,15 @@ The ``File`` Class | ||||
|  | ||||
|         Iterate over the file yielding one line at a time. | ||||
|  | ||||
|         .. versionchanged:: 1.8 | ||||
|  | ||||
|             ``File`` now uses `universal newlines`_. The following are | ||||
|             recognized as ending a line: the Unix end-of-line convention | ||||
|             ``'\n'``, the Windows convention ``'\r\n'``, and the old Macintosh | ||||
|             convention ``'\r'``. | ||||
|  | ||||
|             .. _universal newlines: http://www.python.org/dev/peps/pep-0278 | ||||
|  | ||||
|     .. method:: chunks([chunk_size=None]) | ||||
|  | ||||
|         Iterate over the file yielding "chunks" of a given size. ``chunk_size`` | ||||
|   | ||||
| @@ -82,10 +82,15 @@ Here are some useful attributes of ``UploadedFile``: | ||||
|         for line in uploadedfile: | ||||
|             do_something_with(line) | ||||
|  | ||||
|     However, *unlike* standard Python files, :class:`UploadedFile` only | ||||
|     understands ``\n`` (also known as "Unix-style") line endings. If you know | ||||
|     that you need to handle uploaded files with different line endings, you'll | ||||
|     need to do so in your view. | ||||
|     Lines are split using `universal newlines`_. The following are recognized | ||||
|     as ending a line: the Unix end-of-line convention ``'\n'``, the Windows | ||||
|     convention ``'\r\n'``, and the old Macintosh convention ``'\r'``. | ||||
|  | ||||
|     .. _universal newlines: http://www.python.org/dev/peps/pep-0278 | ||||
|  | ||||
|     .. versionchanged:: 1.8 | ||||
|  | ||||
|         Previously lines were only split on the Unix end-of-line ``'\n'``. | ||||
|  | ||||
| Subclasses of ``UploadedFile`` include: | ||||
|  | ||||
|   | ||||
| @@ -659,6 +659,13 @@ Miscellaneous | ||||
| * By default, :ref:`call_command <call-command>` now always skips the check | ||||
|   framework (unless you pass it ``skip_checks=False``). | ||||
|  | ||||
| * When iterating over lines, :class:`~django.core.files.File` now uses | ||||
|   `universal newlines`_. The following are recognized as ending a line: the | ||||
|   Unix end-of-line convention ``'\n'``, the Windows convention ``'\r\n'``, and | ||||
|   the old Macintosh convention ``'\r'``. | ||||
|  | ||||
|   .. _universal newlines: http://www.python.org/dev/peps/pep-0278 | ||||
|  | ||||
| .. _deprecated-features-1.8: | ||||
|  | ||||
| Features deprecated in 1.8 | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from io import BytesIO | ||||
| from io import BytesIO, StringIO | ||||
| import os | ||||
| import gzip | ||||
| import tempfile | ||||
| @@ -72,6 +72,54 @@ class FileTests(unittest.TestCase): | ||||
|         file = File(BytesIO(b'one\ntwo\nthree')) | ||||
|         self.assertEqual(list(file), [b'one\n', b'two\n', b'three']) | ||||
|  | ||||
|     def test_file_iteration_windows_newlines(self): | ||||
|         """ | ||||
|         #8149 - File objects with \r\n line endings should yield lines | ||||
|         when iterated over. | ||||
|         """ | ||||
|         f = File(BytesIO(b'one\r\ntwo\r\nthree')) | ||||
|         self.assertEqual(list(f), [b'one\r\n', b'two\r\n', b'three']) | ||||
|  | ||||
|     def test_file_iteration_mac_newlines(self): | ||||
|         """ | ||||
|         #8149 - File objects with \r line endings should yield lines | ||||
|         when iterated over. | ||||
|         """ | ||||
|         f = File(BytesIO(b'one\rtwo\rthree')) | ||||
|         self.assertEqual(list(f), [b'one\r', b'two\r', b'three']) | ||||
|  | ||||
|     def test_file_iteration_mixed_newlines(self): | ||||
|         f = File(BytesIO(b'one\rtwo\nthree\r\nfour')) | ||||
|         self.assertEqual(list(f), [b'one\r', b'two\n', b'three\r\n', b'four']) | ||||
|  | ||||
|     def test_file_iteration_with_unix_newline_at_chunk_boundary(self): | ||||
|         f = File(BytesIO(b'one\ntwo\nthree')) | ||||
|         # Set chunk size to create a boundary after \n: | ||||
|         # b'one\n... | ||||
|         #        ^ | ||||
|         f.DEFAULT_CHUNK_SIZE = 4 | ||||
|         self.assertEqual(list(f), [b'one\n', b'two\n', b'three']) | ||||
|  | ||||
|     def test_file_iteration_with_windows_newline_at_chunk_boundary(self): | ||||
|         f = File(BytesIO(b'one\r\ntwo\r\nthree')) | ||||
|         # Set chunk size to create a boundary between \r and \n: | ||||
|         # b'one\r\n... | ||||
|         #        ^ | ||||
|         f.DEFAULT_CHUNK_SIZE = 4 | ||||
|         self.assertEqual(list(f), [b'one\r\n', b'two\r\n', b'three']) | ||||
|  | ||||
|     def test_file_iteration_with_mac_newline_at_chunk_boundary(self): | ||||
|         f = File(BytesIO(b'one\rtwo\rthree')) | ||||
|         # Set chunk size to create a boundary after \r: | ||||
|         # b'one\r... | ||||
|         #        ^ | ||||
|         f.DEFAULT_CHUNK_SIZE = 4 | ||||
|         self.assertEqual(list(f), [b'one\r', b'two\r', b'three']) | ||||
|  | ||||
|     def test_file_iteration_with_text(self): | ||||
|         f = File(StringIO('one\ntwo\nthree')) | ||||
|         self.assertEqual(list(f), ['one\n', 'two\n', 'three']) | ||||
|  | ||||
|  | ||||
| class NoNameFileTestCase(unittest.TestCase): | ||||
|     """ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user