From ee5b8e53cbb502c2c1890ca1cc3c53e3bc67709a Mon Sep 17 00:00:00 2001
From: DongwookKim0823 <dwkim0823@naver.com>
Date: Mon, 24 Jun 2024 14:34:14 +0900
Subject: [PATCH] Fixed #35533 -- Improved urlize function to handle markdown
 links correctly.

Updated the urlize function to correctly handle markdown links. Added tests to ensure the correct behavior of the urlize function with various markdown link inputs.
---
 django/utils/html.py                          | 87 ++++++++++++++++++-
 django/utils/markdown.py                      | 67 ++++++++++++++
 .../filter_tests/test_urlize.py               |  4 +-
 tests/utils_tests/test_html.py                | 76 ++++++++++++++++
 4 files changed, 231 insertions(+), 3 deletions(-)
 create mode 100644 django/utils/markdown.py
diff --git a/django/utils/html.py b/django/utils/html.py
index 22d3ae42fa..a4be3cf8e6 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -11,6 +11,7 @@ from django.utils.deprecation import RemovedInDjango60Warning
 from django.utils.encoding import punycode
 from django.utils.functional import Promise, keep_lazy, keep_lazy_text
 from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
+from django.utils.markdown import find_closing_markdown_bracket, has_markdown_link
 from django.utils.regex_helper import _lazy_re_compile
 from django.utils.safestring import SafeData, SafeString, mark_safe
 from django.utils.text import normalize_newlines
@@ -278,6 +279,7 @@ class Urlizer:
 
     mailto_template = "mailto:{local}@{domain}"
     url_template = '<a href="{href}"{attrs}>{url}</a>'
+    markdown_url_template = '[{text}](<a href="{url}"{attrs}>{trimmed_url}</a>)'
 
     def __call__(self, text, trim_url_limit=None, nofollow=False, autoescape=False):
         """
@@ -291,7 +293,17 @@ class Urlizer:
         """
         safe_input = isinstance(text, SafeData)
 
-        words = self.word_split_re.split(str(text))
+        text = str(text)
+        if has_markdown_link(text):
+            return self.handle_markdown_link(
+                text,
+                safe_input=safe_input,
+                trim_url_limit=trim_url_limit,
+                nofollow=nofollow,
+                autoescape=autoescape,
+            )
+
+        words = self.word_split_re.split(text)
         return "".join(
             [
                 self.handle_word(
@@ -305,6 +317,79 @@ class Urlizer:
             ]
         )
 
+    def handle_markdown_link(
+        self,
+        text,
+        *,
+        safe_input,
+        trim_url_limit=None,
+        nofollow=False,
+        autoescape=False,
+    ):
+        nofollow_attr = ' rel="nofollow"' if nofollow else ""
+
+        def find_and_replace_link(text):
+            i = 0
+            result = []
+            while i < len(text):
+                if text[i] == "\\":
+                    result.append(text[i : i + 2])
+                    i += 2
+                    continue
+                if text[i] == "[":
+                    start = i
+                    close_bracket = find_closing_markdown_bracket(text, i + 1)
+                    if (
+                        close_bracket != -1
+                        and close_bracket + 1 < len(text)
+                        and text[close_bracket + 1] == "("
+                    ):
+                        j = close_bracket + 2
+                        paren_depth = 1
+                        while j < len(text):
+                            if text[j] == "\\":
+                                j += 2
+                                continue
+                            if text[j] == "(":
+                                paren_depth += 1
+                            elif text[j] == ")":
+                                paren_depth -= 1
+                                if paren_depth == 0:
+                                    link_text = text[start + 1 : close_bracket]
+                                    link_url = text[close_bracket + 2 : j]
+                                    trimmed_url = self.trim_url(
+                                        link_url, limit=trim_url_limit
+                                    )
+
+                                    if autoescape and not safe_input:
+                                        link_text = escape(link_text)
+                                        link_url = escape(link_url)
+                                        trimmed_url = escape(trimmed_url)
+
+                                    result.append(
+                                        self.markdown_url_template.format(
+                                            text=link_text,
+                                            url=link_url,
+                                            attrs=nofollow_attr,
+                                            trimmed_url=trimmed_url,
+                                        )
+                                    )
+                                    i = j + 1
+                                    break
+                            j += 1
+                        else:
+                            result.append(text[i])
+                            i += 1
+                    else:
+                        result.append(text[i])
+                        i = close_bracket + 1 if close_bracket != -1 else i + 1
+                else:
+                    result.append(text[i])
+                    i += 1
+            return "".join(result)
+
+        return find_and_replace_link(text)
+
     def handle_word(
         self,
         word,
diff --git a/django/utils/markdown.py b/django/utils/markdown.py
new file mode 100644
index 0000000000..c0213ed47a
--- /dev/null
+++ b/django/utils/markdown.py
@@ -0,0 +1,67 @@
+def find_closing_markdown_bracket(text, start):
+    """
+    Find the closing bracket corresponding to the opening bracket.
+    """
+    depth = 0
+    i = start
+    while i < len(text):
+        if text[i] == "\\":
+            i += 2
+            continue
+        if text[i] == "[":
+            depth += 1
+        elif text[i] == "]":
+            if depth == 0:
+                return i
+            depth -= 1
+        i += 1
+    return -1
+
+
+def has_markdown_link(text):
+    """
+    Check if the given text contains any Markdown links.
+    """
+
+    def is_valid_url(start, end):
+        """
+        Check if the URL is valid.
+        """
+        url = text[start:end].strip()
+        return (
+            url.startswith("http://")
+            or url.startswith("https://")
+            or any(c.isalnum() for c in url)
+        )
+
+    i = 0
+    while i < len(text):
+        if text[i] == "\\":
+            i += 2
+            continue
+        if text[i] == "[":
+            close_bracket = find_closing_markdown_bracket(text, i + 1)
+            if (
+                close_bracket != -1
+                and close_bracket + 1 < len(text)
+                and text[close_bracket + 1] == "("
+            ):
+                j = close_bracket + 2
+                paren_depth = 1
+                while j < len(text):
+                    if text[j] == "\\":
+                        j += 2
+                        continue
+                    if text[j] == "(":
+                        paren_depth += 1
+                    elif text[j] == ")":
+                        paren_depth -= 1
+                        if paren_depth == 0:
+                            if is_valid_url(close_bracket + 2, j):
+                                return True
+                            break
+                    j += 1
+            i = close_bracket + 1 if close_bracket != -1 else i + 1
+        else:
+            i += 1
+    return False
diff --git a/tests/template_tests/filter_tests/test_urlize.py b/tests/template_tests/filter_tests/test_urlize.py
index 8f84e62c92..1b377883f9 100644
--- a/tests/template_tests/filter_tests/test_urlize.py
+++ b/tests/template_tests/filter_tests/test_urlize.py
@@ -320,8 +320,8 @@ class FunctionTests(SimpleTestCase):
         )
         self.assertEqual(
             urlize("[http://168.192.0.1](http://168.192.0.1)"),
-            '[<a href="http://168.192.0.1](http://168.192.0.1)" rel="nofollow">'
-            "http://168.192.0.1](http://168.192.0.1)</a>",
+            '[http://168.192.0.1](<a href="http://168.192.0.1" rel="nofollow">'
+            "http://168.192.0.1</a>)",
         )
 
     def test_wrapping_characters(self):
diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py
index ad31b8cc5b..ecb9bc2ea2 100644
--- a/tests/utils_tests/test_html.py
+++ b/tests/utils_tests/test_html.py
@@ -17,6 +17,7 @@ from django.utils.html import (
     strip_spaces_between_tags,
     strip_tags,
     urlize,
+    urlizer,
 )
 from django.utils.safestring import mark_safe
 
@@ -356,3 +357,78 @@ class TestUtilsHtml(SimpleTestCase):
         for value in tests:
             with self.subTest(value=value):
                 self.assertEqual(urlize(value), value)
+
+    def test_handle_markdown_link(self):
+        tests = [
+            {
+                "input": "Here's a [link with [nested] brackets](https://example.com)",
+                "expected": "Here's a [link with [nested] brackets](<a href=\"https://"
+                'example.com">https://example.com</a>)',
+                "params": {
+                    "trim_url_limit": None,
+                    "nofollow": False,
+                    "autoescape": False,
+                },
+            },
+            {
+                "input": "Check out [this link](https://example.com/page(1))",
+                "expected": 'Check out [this link](<a href="https://example.com/'
+                'page(1)">https://example.com/page(1)</a>)',
+                "params": {
+                    "trim_url_limit": None,
+                    "nofollow": False,
+                    "autoescape": False,
+                },
+            },
+            {
+                "input": "Here's a [complex URL](https://example.com/"
+                "path?param1=value1&param2=value2#fragment)",
+                "expected": "Here's a [complex URL](<a href=\"https://example.com/"
+                'path?param1=value1&amp;param2=value2#fragment">'
+                "https://example.com/path?param1=value1&amp;"
+                "param2=value2#fragment</a>)",
+                "params": {
+                    "trim_url_limit": None,
+                    "nofollow": False,
+                    "autoescape": True,
+                },
+            },
+            {
+                "input": "Multiple [link1](https://example1.com) and "
+                "[link2](https://example2.com)",
+                "expected": 'Multiple [link1](<a href="https://example1.com">'
+                "https://example1.com</a>) and [link2]"
+                '(<a href="https://example2.com">https://example2.com</a>)',
+                "params": {
+                    "trim_url_limit": None,
+                    "nofollow": False,
+                    "autoescape": False,
+                },
+            },
+            {
+                "input": "This is a [broken link(https://example.com)",
+                "expected": "This is a [broken link(https://example.com)",
+                "params": {
+                    "trim_url_limit": None,
+                    "nofollow": False,
+                    "autoescape": False,
+                },
+            },
+            {
+                "input": "Here's a [very long URL](https://example.com/"
+                + "x" * 100
+                + ")",
+                "expected": "Here's a [very long URL](<a href=\"https://example.com/"
+                + "x" * 100
+                + '">https://example.com/xxxxxxxxx…</a>)',
+                "params": {
+                    "trim_url_limit": 30,
+                    "nofollow": False,
+                    "autoescape": False,
+                },
+            },
+        ]
+        for test in tests:
+            with self.subTest(test=test):
+                output = urlizer(test["input"], **test["params"])
+                self.assertEqual(output, test["expected"])