mirror of
				https://github.com/django/django.git
				synced 2025-10-25 22:56:12 +00:00 
			
		
		
		
	Improved strip_tags and clarified documentation
The fact that strip_tags cannot guarantee to really strip all non-safe HTML content was not clear enough. Also see: https://www.djangoproject.com/weblog/2014/mar/22/strip-tags-advisory/
This commit is contained in:
		| @@ -118,7 +118,10 @@ linebreaks = allow_lazy(linebreaks, six.text_type) | |||||||
|  |  | ||||||
| class MLStripper(HTMLParser): | class MLStripper(HTMLParser): | ||||||
|     def __init__(self): |     def __init__(self): | ||||||
|         HTMLParser.__init__(self) |         if six.PY2: | ||||||
|  |             HTMLParser.__init__(self) | ||||||
|  |         else: | ||||||
|  |             HTMLParser.__init__(self, strict=False) | ||||||
|         self.reset() |         self.reset() | ||||||
|         self.fed = [] |         self.fed = [] | ||||||
|  |  | ||||||
| @@ -135,16 +138,36 @@ class MLStripper(HTMLParser): | |||||||
|         return ''.join(self.fed) |         return ''.join(self.fed) | ||||||
|  |  | ||||||
|  |  | ||||||
| def strip_tags(value): | def _strip_once(value): | ||||||
|     """Returns the given HTML with all tags stripped.""" |     """ | ||||||
|  |     Internal tag stripping utility used by strip_tags. | ||||||
|  |     """ | ||||||
|     s = MLStripper() |     s = MLStripper() | ||||||
|     try: |     try: | ||||||
|         s.feed(value) |         s.feed(value) | ||||||
|         s.close() |  | ||||||
|     except HTMLParseError: |     except HTMLParseError: | ||||||
|         return value |         return value | ||||||
|  |     try: | ||||||
|  |         s.close() | ||||||
|  |     except (HTMLParseError, UnboundLocalError) as err: | ||||||
|  |         # UnboundLocalError because of http://bugs.python.org/issue17802 | ||||||
|  |         # on Python 3.2, triggered by strict=False mode of HTMLParser | ||||||
|  |         return s.get_data() + s.rawdata | ||||||
|     else: |     else: | ||||||
|         return s.get_data() |         return s.get_data() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def strip_tags(value): | ||||||
|  |     """Returns the given HTML with all tags stripped.""" | ||||||
|  |     while True: | ||||||
|  |         if not ('<' in value or '>' in value): | ||||||
|  |             return value | ||||||
|  |         new_value = _strip_once(value) | ||||||
|  |         if new_value == value: | ||||||
|  |             # _strip_once was not able to detect more tags | ||||||
|  |             return value | ||||||
|  |         else: | ||||||
|  |             value = new_value | ||||||
| strip_tags = allow_lazy(strip_tags) | strip_tags = allow_lazy(strip_tags) | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1985,7 +1985,7 @@ If ``value`` is ``10``, the output will be ``1.000000E+01``. | |||||||
| striptags | striptags | ||||||
| ^^^^^^^^^ | ^^^^^^^^^ | ||||||
|  |  | ||||||
| Strips all [X]HTML tags. | Makes all possible efforts to strip all [X]HTML tags. | ||||||
|  |  | ||||||
| For example:: | For example:: | ||||||
|  |  | ||||||
| @@ -1994,6 +1994,16 @@ For example:: | |||||||
| If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"``, the | If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"``, the | ||||||
| output will be ``"Joel is a slug"``. | output will be ``"Joel is a slug"``. | ||||||
|  |  | ||||||
|  | .. admonition:: No safety guarantee | ||||||
|  |  | ||||||
|  |     Note that ``striptags`` doesn't give any guarantee about its output being | ||||||
|  |     entirely HTML safe, particularly with non valid HTML input. So **NEVER** | ||||||
|  |     apply the ``safe`` filter to a ``striptags`` output. | ||||||
|  |     If you are looking for something more robust, you can use the ``bleach`` | ||||||
|  |     Python library, notably its `clean`_ method. | ||||||
|  |  | ||||||
|  | .. _clean: http://bleach.readthedocs.org/en/latest/clean.html | ||||||
|  |  | ||||||
| .. templatefilter:: time | .. templatefilter:: time | ||||||
|  |  | ||||||
| time | time | ||||||
|   | |||||||
| @@ -595,17 +595,23 @@ escaping HTML. | |||||||
|  |  | ||||||
| .. function:: strip_tags(value) | .. function:: strip_tags(value) | ||||||
|  |  | ||||||
|     Removes anything that looks like an html tag from the string, that is |     Tries to remove anything that looks like an HTML tag from the string, that | ||||||
|     anything contained within ``<>``. |     is anything contained within ``<>``. | ||||||
|  |     Absolutely NO guaranty is provided about the resulting string being entirely | ||||||
|  |     HTML safe. So NEVER mark safe the result of a ``strip_tag`` call without | ||||||
|  |     escaping it first, for example with :func:`~django.utils.html.escape`. | ||||||
|  |  | ||||||
|     For example:: |     For example:: | ||||||
|  |  | ||||||
|         strip_tags(value) |         strip_tags(value) | ||||||
|  |  | ||||||
|     If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"`` |     If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"`` | ||||||
|     the return value will be ``"Joel is a slug"``. Note that ``strip_tags`` |     the return value will be ``"Joel is a slug"``. | ||||||
|     result may still contain unsafe HTML content, so you might use |  | ||||||
|     :func:`~django.utils.html.escape` to make it a safe string. |     If you are looking for a more robust solution, take a look at the `bleach`_ | ||||||
|  |     Python library. | ||||||
|  |  | ||||||
|  |     .. _bleach: https://pypi.python.org/pypi/bleach | ||||||
|  |  | ||||||
|     .. versionchanged:: 1.6 |     .. versionchanged:: 1.6 | ||||||
|  |  | ||||||
|   | |||||||
| @@ -80,6 +80,8 @@ class TestUtilsHtml(TestCase): | |||||||
|             ('a<p a >b</p>c', 'abc'), |             ('a<p a >b</p>c', 'abc'), | ||||||
|             ('d<a:b c:d>e</p>f', 'def'), |             ('d<a:b c:d>e</p>f', 'def'), | ||||||
|             ('<strong>foo</strong><a href="http://example.com">bar</a>', 'foobar'), |             ('<strong>foo</strong><a href="http://example.com">bar</a>', 'foobar'), | ||||||
|  |             ('<sc<!-- -->ript>test<<!-- -->/script>', 'test'), | ||||||
|  |             ('<script>alert()</script>&h', 'alert()&h'), | ||||||
|         ) |         ) | ||||||
|         for value, output in items: |         for value, output in items: | ||||||
|             self.check_output(f, value, output) |             self.check_output(f, value, output) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user