From 48235ba807483fe349d2dc66aaeddc0d03f8b0d4 Mon Sep 17 00:00:00 2001
From: Jon Dufresne <jon.dufresne@gmail.com>
Date: Thu, 9 May 2019 06:55:32 -0700
Subject: [PATCH] Refs #30399 -- Made assertHTMLEqual normalize character and
 entity references.

---
 django/test/html.py           | 15 ++++++---------
 docs/releases/3.0.txt         |  5 +++++
 docs/topics/testing/tools.txt |  6 ++++--
 tests/test_utils/tests.py     | 25 +++++++++++++++++++++++++
 4 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/django/test/html.py b/django/test/html.py
index 8b064529b0..911872bb69 100644
--- a/django/test/html.py
+++ b/django/test/html.py
@@ -3,11 +3,14 @@
 import re
 from html.parser import HTMLParser
 
-WHITESPACE = re.compile(r'\s+')
+# ASCII whitespace is U+0009 TAB, U+000A LF, U+000C FF, U+000D CR, or U+0020
+# SPACE.
+# https://infra.spec.whatwg.org/#ascii-whitespace
+ASCII_WHITESPACE = re.compile(r'[\t\n\f\r ]+')
 
 
 def normalize_whitespace(string):
-    return WHITESPACE.sub(' ', string)
+    return ASCII_WHITESPACE.sub(' ', string)
 
 
 class Element:
@@ -144,7 +147,7 @@ class Parser(HTMLParser):
     )
 
     def __init__(self):
-        super().__init__(convert_charrefs=False)
+        super().__init__()
         self.root = RootElement()
         self.open_tags = []
         self.element_positions = {}
@@ -202,12 +205,6 @@ class Parser(HTMLParser):
     def handle_data(self, data):
         self.current.append(data)
 
-    def handle_charref(self, name):
-        self.current.append('&%s;' % name)
-
-    def handle_entityref(self, name):
-        self.current.append('&%s;' % name)
-
 
 def parse_html(html):
     """
diff --git a/docs/releases/3.0.txt b/docs/releases/3.0.txt
index 2b9c5c5ea0..335ab2c0d5 100644
--- a/docs/releases/3.0.txt
+++ b/docs/releases/3.0.txt
@@ -246,6 +246,11 @@ Tests
 * Tests and test cases to run can be selected by test name pattern using the
   new :option:`test -k` option.
 
+* HTML comparison, as used by
+  :meth:`~django.test.SimpleTestCase.assertHTMLEqual`, now treats text, character
+  references, and entity references that refer to the same character as
+  equivalent.
+
 URLs
 ~~~~
 
diff --git a/docs/topics/testing/tools.txt b/docs/topics/testing/tools.txt
index d9f508023c..6d37a7421d 100644
--- a/docs/topics/testing/tools.txt
+++ b/docs/topics/testing/tools.txt
@@ -1603,14 +1603,16 @@ your test suite.
     * The ordering of attributes of an HTML element is not significant.
     * Attributes without an argument are equal to attributes that equal in
       name and value (see the examples).
+    * Text, character references, and entity references that refer to the same
+      character are equivalent.
 
     The following examples are valid tests and don't raise any
     ``AssertionError``::
 
         self.assertHTMLEqual(
-            '<p>Hello <b>world!</p>',
+            '<p>Hello <b>&#x27;world&#x27;!</p>',
             '''<p>
-                Hello   <b>world! </b>
+                Hello   <b>&#39;world&#39;! </b>
             </p>'''
         )
         self.assertHTMLEqual(
diff --git a/tests/test_utils/tests.py b/tests/test_utils/tests.py
index 69a99d47d2..5b84bbd383 100644
--- a/tests/test_utils/tests.py
+++ b/tests/test_utils/tests.py
@@ -612,6 +612,31 @@ class HTMLEqualTests(SimpleTestCase):
             '<input type="text" id="id_name" />',
             '<input type="password" id="id_name" />')
 
+    def test_normalize_refs(self):
+        pairs = [
+            ('&#39;', '&#x27;'),
+            ('&#39;', "'"),
+            ('&#x27;', '&#39;'),
+            ('&#x27;', "'"),
+            ("'", '&#39;'),
+            ("'", '&#x27;'),
+            ('&amp;', '&#38;'),
+            ('&amp;', '&#x26;'),
+            ('&amp;', '&'),
+            ('&#38;', '&amp;'),
+            ('&#38;', '&#x26;'),
+            ('&#38;', '&'),
+            ('&#x26;', '&amp;'),
+            ('&#x26;', '&#38;'),
+            ('&#x26;', '&'),
+            ('&', '&amp;'),
+            ('&', '&#38;'),
+            ('&', '&#x26;'),
+        ]
+        for pair in pairs:
+            with self.subTest(repr(pair)):
+                self.assertHTMLEqual(*pair)
+
     def test_complex_examples(self):
         self.assertHTMLEqual(
             """<tr><th><label for="id_first_name">First name:</label></th>