diff --git a/django/core/files/base.py b/django/core/files/base.py index 159a98e52f..c1c9199cb5 100644 --- a/django/core/files/base.py +++ b/django/core/files/base.py @@ -102,16 +102,22 @@ class File(FileProxyMixin): # Iterate over this file-like object by newlines buffer_ = None for chunk in self.chunks(): - chunk_buffer = BytesIO(chunk) - - for line in chunk_buffer: + for line in chunk.splitlines(True): if buffer_: - line = buffer_ + line + if endswith_cr(buffer_) and not equals_lf(line): + # Line split after a \r newline; yield buffer_. + yield buffer_ + # Continue with line. + else: + # Line either split without a newline (line + # continues after buffer_) or with \r\n + # newline (line == b'\n'). + line = buffer_ + line + # buffer_ handled, clear it. buffer_ = None - # If this is the end of a line, yield - # otherwise, wait for the next round - if line[-1:] in (b'\n', b'\r'): + # If this is the end of a \n or \r\n line, yield. + if endswith_lf(line): yield line else: buffer_ = line @@ -165,3 +171,24 @@ class ContentFile(File): def close(self): pass + + +def endswith_cr(line): + """ + Return True if line (a text or byte string) ends with '\r'. + """ + return line.endswith('\r' if isinstance(line, six.text_type) else b'\r') + + +def endswith_lf(line): + """ + Return True if line (a text or byte string) ends with '\n'. + """ + return line.endswith('\n' if isinstance(line, six.text_type) else b'\n') + + +def equals_lf(line): + """ + Return True if line (a text or byte string) equals '\n'. + """ + return line == ('\n' if isinstance(line, six.text_type) else b'\n') diff --git a/docs/ref/files/file.txt b/docs/ref/files/file.txt index 6874936eab..57f8e9eab3 100644 --- a/docs/ref/files/file.txt +++ b/docs/ref/files/file.txt @@ -53,6 +53,15 @@ The ``File`` Class Iterate over the file yielding one line at a time. + .. versionchanged:: 1.8 + + ``File`` now uses `universal newlines`_. The following are + recognized as ending a line: the Unix end-of-line convention + ``'\n'``, the Windows convention ``'\r\n'``, and the old Macintosh + convention ``'\r'``. + + .. _universal newlines: http://www.python.org/dev/peps/pep-0278 + .. method:: chunks([chunk_size=None]) Iterate over the file yielding "chunks" of a given size. ``chunk_size`` diff --git a/docs/ref/files/uploads.txt b/docs/ref/files/uploads.txt index 1b9103bb3b..0817816197 100644 --- a/docs/ref/files/uploads.txt +++ b/docs/ref/files/uploads.txt @@ -82,10 +82,15 @@ Here are some useful attributes of ``UploadedFile``: for line in uploadedfile: do_something_with(line) - However, *unlike* standard Python files, :class:`UploadedFile` only - understands ``\n`` (also known as "Unix-style") line endings. If you know - that you need to handle uploaded files with different line endings, you'll - need to do so in your view. + Lines are split using `universal newlines`_. The following are recognized + as ending a line: the Unix end-of-line convention ``'\n'``, the Windows + convention ``'\r\n'``, and the old Macintosh convention ``'\r'``. + + .. _universal newlines: http://www.python.org/dev/peps/pep-0278 + + .. versionchanged:: 1.8 + + Previously lines were only split on the Unix end-of-line ``'\n'``. Subclasses of ``UploadedFile`` include: diff --git a/docs/releases/1.8.txt b/docs/releases/1.8.txt index 23c6deeef1..dbdff7b7e0 100644 --- a/docs/releases/1.8.txt +++ b/docs/releases/1.8.txt @@ -659,6 +659,13 @@ Miscellaneous * By default, :ref:`call_command ` now always skips the check framework (unless you pass it ``skip_checks=False``). +* When iterating over lines, :class:`~django.core.files.File` now uses + `universal newlines`_. The following are recognized as ending a line: the + Unix end-of-line convention ``'\n'``, the Windows convention ``'\r\n'``, and + the old Macintosh convention ``'\r'``. + + .. _universal newlines: http://www.python.org/dev/peps/pep-0278 + .. _deprecated-features-1.8: Features deprecated in 1.8 diff --git a/tests/files/tests.py b/tests/files/tests.py index be243b2527..f2f1df3626 100644 --- a/tests/files/tests.py +++ b/tests/files/tests.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -from io import BytesIO +from io import BytesIO, StringIO import os import gzip import tempfile @@ -72,6 +72,54 @@ class FileTests(unittest.TestCase): file = File(BytesIO(b'one\ntwo\nthree')) self.assertEqual(list(file), [b'one\n', b'two\n', b'three']) + def test_file_iteration_windows_newlines(self): + """ + #8149 - File objects with \r\n line endings should yield lines + when iterated over. + """ + f = File(BytesIO(b'one\r\ntwo\r\nthree')) + self.assertEqual(list(f), [b'one\r\n', b'two\r\n', b'three']) + + def test_file_iteration_mac_newlines(self): + """ + #8149 - File objects with \r line endings should yield lines + when iterated over. + """ + f = File(BytesIO(b'one\rtwo\rthree')) + self.assertEqual(list(f), [b'one\r', b'two\r', b'three']) + + def test_file_iteration_mixed_newlines(self): + f = File(BytesIO(b'one\rtwo\nthree\r\nfour')) + self.assertEqual(list(f), [b'one\r', b'two\n', b'three\r\n', b'four']) + + def test_file_iteration_with_unix_newline_at_chunk_boundary(self): + f = File(BytesIO(b'one\ntwo\nthree')) + # Set chunk size to create a boundary after \n: + # b'one\n... + # ^ + f.DEFAULT_CHUNK_SIZE = 4 + self.assertEqual(list(f), [b'one\n', b'two\n', b'three']) + + def test_file_iteration_with_windows_newline_at_chunk_boundary(self): + f = File(BytesIO(b'one\r\ntwo\r\nthree')) + # Set chunk size to create a boundary between \r and \n: + # b'one\r\n... + # ^ + f.DEFAULT_CHUNK_SIZE = 4 + self.assertEqual(list(f), [b'one\r\n', b'two\r\n', b'three']) + + def test_file_iteration_with_mac_newline_at_chunk_boundary(self): + f = File(BytesIO(b'one\rtwo\rthree')) + # Set chunk size to create a boundary after \r: + # b'one\r... + # ^ + f.DEFAULT_CHUNK_SIZE = 4 + self.assertEqual(list(f), [b'one\r', b'two\r', b'three']) + + def test_file_iteration_with_text(self): + f = File(StringIO('one\ntwo\nthree')) + self.assertEqual(list(f), ['one\n', 'two\n', 'three']) + class NoNameFileTestCase(unittest.TestCase): """