1
0
mirror of https://github.com/django/django.git synced 2024-12-22 17:16:24 +00:00

Fixed #35533 -- Improved urlize function to handle markdown links correctly.

Updated the urlize function to correctly handle markdown links. Added tests to ensure the correct behavior of the urlize function with various markdown link inputs.
This commit is contained in:
DongwookKim0823 2024-06-24 14:34:14 +09:00
parent 72b7aecbbf
commit ee5b8e53cb
4 changed files with 231 additions and 3 deletions

View File

@ -11,6 +11,7 @@ from django.utils.deprecation import RemovedInDjango60Warning
from django.utils.encoding import punycode
from django.utils.functional import Promise, keep_lazy, keep_lazy_text
from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS
from django.utils.markdown import find_closing_markdown_bracket, has_markdown_link
from django.utils.regex_helper import _lazy_re_compile
from django.utils.safestring import SafeData, SafeString, mark_safe
from django.utils.text import normalize_newlines
@ -278,6 +279,7 @@ class Urlizer:
mailto_template = "mailto:{local}@{domain}"
url_template = '<a href="{href}"{attrs}>{url}</a>'
markdown_url_template = '[{text}](<a href="{url}"{attrs}>{trimmed_url}</a>)'
def __call__(self, text, trim_url_limit=None, nofollow=False, autoescape=False):
"""
@ -291,7 +293,17 @@ class Urlizer:
"""
safe_input = isinstance(text, SafeData)
words = self.word_split_re.split(str(text))
text = str(text)
if has_markdown_link(text):
return self.handle_markdown_link(
text,
safe_input=safe_input,
trim_url_limit=trim_url_limit,
nofollow=nofollow,
autoescape=autoescape,
)
words = self.word_split_re.split(text)
return "".join(
[
self.handle_word(
@ -305,6 +317,79 @@ class Urlizer:
]
)
def handle_markdown_link(
self,
text,
*,
safe_input,
trim_url_limit=None,
nofollow=False,
autoescape=False,
):
nofollow_attr = ' rel="nofollow"' if nofollow else ""
def find_and_replace_link(text):
i = 0
result = []
while i < len(text):
if text[i] == "\\":
result.append(text[i : i + 2])
i += 2
continue
if text[i] == "[":
start = i
close_bracket = find_closing_markdown_bracket(text, i + 1)
if (
close_bracket != -1
and close_bracket + 1 < len(text)
and text[close_bracket + 1] == "("
):
j = close_bracket + 2
paren_depth = 1
while j < len(text):
if text[j] == "\\":
j += 2
continue
if text[j] == "(":
paren_depth += 1
elif text[j] == ")":
paren_depth -= 1
if paren_depth == 0:
link_text = text[start + 1 : close_bracket]
link_url = text[close_bracket + 2 : j]
trimmed_url = self.trim_url(
link_url, limit=trim_url_limit
)
if autoescape and not safe_input:
link_text = escape(link_text)
link_url = escape(link_url)
trimmed_url = escape(trimmed_url)
result.append(
self.markdown_url_template.format(
text=link_text,
url=link_url,
attrs=nofollow_attr,
trimmed_url=trimmed_url,
)
)
i = j + 1
break
j += 1
else:
result.append(text[i])
i += 1
else:
result.append(text[i])
i = close_bracket + 1 if close_bracket != -1 else i + 1
else:
result.append(text[i])
i += 1
return "".join(result)
return find_and_replace_link(text)
def handle_word(
self,
word,

67
django/utils/markdown.py Normal file
View File

@ -0,0 +1,67 @@
def find_closing_markdown_bracket(text, start):
"""
Find the closing bracket corresponding to the opening bracket.
"""
depth = 0
i = start
while i < len(text):
if text[i] == "\\":
i += 2
continue
if text[i] == "[":
depth += 1
elif text[i] == "]":
if depth == 0:
return i
depth -= 1
i += 1
return -1
def has_markdown_link(text):
"""
Check if the given text contains any Markdown links.
"""
def is_valid_url(start, end):
"""
Check if the URL is valid.
"""
url = text[start:end].strip()
return (
url.startswith("http://")
or url.startswith("https://")
or any(c.isalnum() for c in url)
)
i = 0
while i < len(text):
if text[i] == "\\":
i += 2
continue
if text[i] == "[":
close_bracket = find_closing_markdown_bracket(text, i + 1)
if (
close_bracket != -1
and close_bracket + 1 < len(text)
and text[close_bracket + 1] == "("
):
j = close_bracket + 2
paren_depth = 1
while j < len(text):
if text[j] == "\\":
j += 2
continue
if text[j] == "(":
paren_depth += 1
elif text[j] == ")":
paren_depth -= 1
if paren_depth == 0:
if is_valid_url(close_bracket + 2, j):
return True
break
j += 1
i = close_bracket + 1 if close_bracket != -1 else i + 1
else:
i += 1
return False

View File

@ -320,8 +320,8 @@ class FunctionTests(SimpleTestCase):
)
self.assertEqual(
urlize("[http://168.192.0.1](http://168.192.0.1)"),
'[<a href="http://168.192.0.1](http://168.192.0.1)" rel="nofollow">'
"http://168.192.0.1](http://168.192.0.1)</a>",
'[http://168.192.0.1](<a href="http://168.192.0.1" rel="nofollow">'
"http://168.192.0.1</a>)",
)
def test_wrapping_characters(self):

View File

@ -17,6 +17,7 @@ from django.utils.html import (
strip_spaces_between_tags,
strip_tags,
urlize,
urlizer,
)
from django.utils.safestring import mark_safe
@ -356,3 +357,78 @@ class TestUtilsHtml(SimpleTestCase):
for value in tests:
with self.subTest(value=value):
self.assertEqual(urlize(value), value)
def test_handle_markdown_link(self):
tests = [
{
"input": "Here's a [link with [nested] brackets](https://example.com)",
"expected": "Here's a [link with [nested] brackets](<a href=\"https://"
'example.com">https://example.com</a>)',
"params": {
"trim_url_limit": None,
"nofollow": False,
"autoescape": False,
},
},
{
"input": "Check out [this link](https://example.com/page(1))",
"expected": 'Check out [this link](<a href="https://example.com/'
'page(1)">https://example.com/page(1)</a>)',
"params": {
"trim_url_limit": None,
"nofollow": False,
"autoescape": False,
},
},
{
"input": "Here's a [complex URL](https://example.com/"
"path?param1=value1&param2=value2#fragment)",
"expected": "Here's a [complex URL](<a href=\"https://example.com/"
'path?param1=value1&amp;param2=value2#fragment">'
"https://example.com/path?param1=value1&amp;"
"param2=value2#fragment</a>)",
"params": {
"trim_url_limit": None,
"nofollow": False,
"autoescape": True,
},
},
{
"input": "Multiple [link1](https://example1.com) and "
"[link2](https://example2.com)",
"expected": 'Multiple [link1](<a href="https://example1.com">'
"https://example1.com</a>) and [link2]"
'(<a href="https://example2.com">https://example2.com</a>)',
"params": {
"trim_url_limit": None,
"nofollow": False,
"autoescape": False,
},
},
{
"input": "This is a [broken link(https://example.com)",
"expected": "This is a [broken link(https://example.com)",
"params": {
"trim_url_limit": None,
"nofollow": False,
"autoescape": False,
},
},
{
"input": "Here's a [very long URL](https://example.com/"
+ "x" * 100
+ ")",
"expected": "Here's a [very long URL](<a href=\"https://example.com/"
+ "x" * 100
+ '">https://example.com/xxxxxxxxx…</a>)',
"params": {
"trim_url_limit": 30,
"nofollow": False,
"autoescape": False,
},
},
]
for test in tests:
with self.subTest(test=test):
output = urlizer(test["input"], **test["params"])
self.assertEqual(output, test["expected"])