diff --git a/django/utils/html.py b/django/utils/html.py index 22d3ae42fa..a4be3cf8e6 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -11,6 +11,7 @@ from django.utils.deprecation import RemovedInDjango60Warning from django.utils.encoding import punycode from django.utils.functional import Promise, keep_lazy, keep_lazy_text from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS +from django.utils.markdown import find_closing_markdown_bracket, has_markdown_link from django.utils.regex_helper import _lazy_re_compile from django.utils.safestring import SafeData, SafeString, mark_safe from django.utils.text import normalize_newlines @@ -278,6 +279,7 @@ class Urlizer: mailto_template = "mailto:{local}@{domain}" url_template = '{url}' + markdown_url_template = '[{text}]({trimmed_url})' def __call__(self, text, trim_url_limit=None, nofollow=False, autoescape=False): """ @@ -291,7 +293,17 @@ class Urlizer: """ safe_input = isinstance(text, SafeData) - words = self.word_split_re.split(str(text)) + text = str(text) + if has_markdown_link(text): + return self.handle_markdown_link( + text, + safe_input=safe_input, + trim_url_limit=trim_url_limit, + nofollow=nofollow, + autoescape=autoescape, + ) + + words = self.word_split_re.split(text) return "".join( [ self.handle_word( @@ -305,6 +317,79 @@ class Urlizer: ] ) + def handle_markdown_link( + self, + text, + *, + safe_input, + trim_url_limit=None, + nofollow=False, + autoescape=False, + ): + nofollow_attr = ' rel="nofollow"' if nofollow else "" + + def find_and_replace_link(text): + i = 0 + result = [] + while i < len(text): + if text[i] == "\\": + result.append(text[i : i + 2]) + i += 2 + continue + if text[i] == "[": + start = i + close_bracket = find_closing_markdown_bracket(text, i + 1) + if ( + close_bracket != -1 + and close_bracket + 1 < len(text) + and text[close_bracket + 1] == "(" + ): + j = close_bracket + 2 + paren_depth = 1 + while j < len(text): + if text[j] == "\\": + j += 2 + continue + if text[j] == "(": + paren_depth += 1 + elif text[j] == ")": + paren_depth -= 1 + if paren_depth == 0: + link_text = text[start + 1 : close_bracket] + link_url = text[close_bracket + 2 : j] + trimmed_url = self.trim_url( + link_url, limit=trim_url_limit + ) + + if autoescape and not safe_input: + link_text = escape(link_text) + link_url = escape(link_url) + trimmed_url = escape(trimmed_url) + + result.append( + self.markdown_url_template.format( + text=link_text, + url=link_url, + attrs=nofollow_attr, + trimmed_url=trimmed_url, + ) + ) + i = j + 1 + break + j += 1 + else: + result.append(text[i]) + i += 1 + else: + result.append(text[i]) + i = close_bracket + 1 if close_bracket != -1 else i + 1 + else: + result.append(text[i]) + i += 1 + return "".join(result) + + return find_and_replace_link(text) + def handle_word( self, word, diff --git a/django/utils/markdown.py b/django/utils/markdown.py new file mode 100644 index 0000000000..c0213ed47a --- /dev/null +++ b/django/utils/markdown.py @@ -0,0 +1,67 @@ +def find_closing_markdown_bracket(text, start): + """ + Find the closing bracket corresponding to the opening bracket. + """ + depth = 0 + i = start + while i < len(text): + if text[i] == "\\": + i += 2 + continue + if text[i] == "[": + depth += 1 + elif text[i] == "]": + if depth == 0: + return i + depth -= 1 + i += 1 + return -1 + + +def has_markdown_link(text): + """ + Check if the given text contains any Markdown links. + """ + + def is_valid_url(start, end): + """ + Check if the URL is valid. + """ + url = text[start:end].strip() + return ( + url.startswith("http://") + or url.startswith("https://") + or any(c.isalnum() for c in url) + ) + + i = 0 + while i < len(text): + if text[i] == "\\": + i += 2 + continue + if text[i] == "[": + close_bracket = find_closing_markdown_bracket(text, i + 1) + if ( + close_bracket != -1 + and close_bracket + 1 < len(text) + and text[close_bracket + 1] == "(" + ): + j = close_bracket + 2 + paren_depth = 1 + while j < len(text): + if text[j] == "\\": + j += 2 + continue + if text[j] == "(": + paren_depth += 1 + elif text[j] == ")": + paren_depth -= 1 + if paren_depth == 0: + if is_valid_url(close_bracket + 2, j): + return True + break + j += 1 + i = close_bracket + 1 if close_bracket != -1 else i + 1 + else: + i += 1 + return False diff --git a/tests/template_tests/filter_tests/test_urlize.py b/tests/template_tests/filter_tests/test_urlize.py index 8f84e62c92..1b377883f9 100644 --- a/tests/template_tests/filter_tests/test_urlize.py +++ b/tests/template_tests/filter_tests/test_urlize.py @@ -320,8 +320,8 @@ class FunctionTests(SimpleTestCase): ) self.assertEqual( urlize("[http://168.192.0.1](http://168.192.0.1)"), - '[' - "http://168.192.0.1](http://168.192.0.1)", + '[http://168.192.0.1](' + "http://168.192.0.1)", ) def test_wrapping_characters(self): diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py index ad31b8cc5b..ecb9bc2ea2 100644 --- a/tests/utils_tests/test_html.py +++ b/tests/utils_tests/test_html.py @@ -17,6 +17,7 @@ from django.utils.html import ( strip_spaces_between_tags, strip_tags, urlize, + urlizer, ) from django.utils.safestring import mark_safe @@ -356,3 +357,78 @@ class TestUtilsHtml(SimpleTestCase): for value in tests: with self.subTest(value=value): self.assertEqual(urlize(value), value) + + def test_handle_markdown_link(self): + tests = [ + { + "input": "Here's a [link with [nested] brackets](https://example.com)", + "expected": "Here's a [link with [nested] brackets](https://example.com)', + "params": { + "trim_url_limit": None, + "nofollow": False, + "autoescape": False, + }, + }, + { + "input": "Check out [this link](https://example.com/page(1))", + "expected": 'Check out [this link](https://example.com/page(1))', + "params": { + "trim_url_limit": None, + "nofollow": False, + "autoescape": False, + }, + }, + { + "input": "Here's a [complex URL](https://example.com/" + "path?param1=value1¶m2=value2#fragment)", + "expected": "Here's a [complex URL](' + "https://example.com/path?param1=value1&" + "param2=value2#fragment)", + "params": { + "trim_url_limit": None, + "nofollow": False, + "autoescape": True, + }, + }, + { + "input": "Multiple [link1](https://example1.com) and " + "[link2](https://example2.com)", + "expected": 'Multiple [link1](' + "https://example1.com) and [link2]" + '(https://example2.com)', + "params": { + "trim_url_limit": None, + "nofollow": False, + "autoescape": False, + }, + }, + { + "input": "This is a [broken link(https://example.com)", + "expected": "This is a [broken link(https://example.com)", + "params": { + "trim_url_limit": None, + "nofollow": False, + "autoescape": False, + }, + }, + { + "input": "Here's a [very long URL](https://example.com/" + + "x" * 100 + + ")", + "expected": "Here's a [very long URL](https://example.com/xxxxxxxxx…)', + "params": { + "trim_url_limit": 30, + "nofollow": False, + "autoescape": False, + }, + }, + ] + for test in tests: + with self.subTest(test=test): + output = urlizer(test["input"], **test["params"]) + self.assertEqual(output, test["expected"])