mirror of
				https://github.com/django/django.git
				synced 2025-10-25 22:56:12 +00:00 
			
		
		
		
	[1.7.x] Improved strip_tags and clarified documentation
The fact that strip_tags cannot guarantee to really strip all
non-safe HTML content was not clear enough. Also see:
https://www.djangoproject.com/weblog/2014/mar/22/strip-tags-advisory/
Backport of 6ca6c36f82 from master.
			
			
This commit is contained in:
		| @@ -120,7 +120,10 @@ linebreaks = allow_lazy(linebreaks, six.text_type) | ||||
|  | ||||
| class MLStripper(HTMLParser): | ||||
|     def __init__(self): | ||||
|         HTMLParser.__init__(self) | ||||
|         if six.PY2: | ||||
|             HTMLParser.__init__(self) | ||||
|         else: | ||||
|             HTMLParser.__init__(self, strict=False) | ||||
|         self.reset() | ||||
|         self.fed = [] | ||||
|  | ||||
| @@ -137,16 +140,36 @@ class MLStripper(HTMLParser): | ||||
|         return ''.join(self.fed) | ||||
|  | ||||
|  | ||||
| def strip_tags(value): | ||||
|     """Returns the given HTML with all tags stripped.""" | ||||
| def _strip_once(value): | ||||
|     """ | ||||
|     Internal tag stripping utility used by strip_tags. | ||||
|     """ | ||||
|     s = MLStripper() | ||||
|     try: | ||||
|         s.feed(value) | ||||
|         s.close() | ||||
|     except HTMLParseError: | ||||
|         return value | ||||
|     try: | ||||
|         s.close() | ||||
|     except (HTMLParseError, UnboundLocalError) as err: | ||||
|         # UnboundLocalError because of http://bugs.python.org/issue17802 | ||||
|         # on Python 3.2, triggered by strict=False mode of HTMLParser | ||||
|         return s.get_data() + s.rawdata | ||||
|     else: | ||||
|         return s.get_data() | ||||
|  | ||||
|  | ||||
| def strip_tags(value): | ||||
|     """Returns the given HTML with all tags stripped.""" | ||||
|     while True: | ||||
|         if not ('<' in value or '>' in value): | ||||
|             return value | ||||
|         new_value = _strip_once(value) | ||||
|         if new_value == value: | ||||
|             # _strip_once was not able to detect more tags | ||||
|             return value | ||||
|         else: | ||||
|             value = new_value | ||||
| strip_tags = allow_lazy(strip_tags) | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -2050,7 +2050,7 @@ If ``value`` is ``10``, the output will be ``1.000000E+01``. | ||||
| striptags | ||||
| ^^^^^^^^^ | ||||
|  | ||||
| Strips all [X]HTML tags. | ||||
| Makes all possible efforts to strip all [X]HTML tags. | ||||
|  | ||||
| For example:: | ||||
|  | ||||
| @@ -2059,6 +2059,16 @@ For example:: | ||||
| If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"``, the | ||||
| output will be ``"Joel is a slug"``. | ||||
|  | ||||
| .. admonition:: No safety guarantee | ||||
|  | ||||
|     Note that ``striptags`` doesn't give any guarantee about its output being | ||||
|     entirely HTML safe, particularly with non valid HTML input. So **NEVER** | ||||
|     apply the ``safe`` filter to a ``striptags`` output. | ||||
|     If you are looking for something more robust, you can use the ``bleach`` | ||||
|     Python library, notably its `clean`_ method. | ||||
|  | ||||
| .. _clean: http://bleach.readthedocs.org/en/latest/clean.html | ||||
|  | ||||
| .. templatefilter:: time | ||||
|  | ||||
| time | ||||
|   | ||||
| @@ -595,17 +595,23 @@ escaping HTML. | ||||
|  | ||||
| .. function:: strip_tags(value) | ||||
|  | ||||
|     Removes anything that looks like an html tag from the string, that is | ||||
|     anything contained within ``<>``. | ||||
|     Tries to remove anything that looks like an HTML tag from the string, that | ||||
|     is anything contained within ``<>``. | ||||
|     Absolutely NO guaranty is provided about the resulting string being entirely | ||||
|     HTML safe. So NEVER mark safe the result of a ``strip_tag`` call without | ||||
|     escaping it first, for example with :func:`~django.utils.html.escape`. | ||||
|  | ||||
|     For example:: | ||||
|  | ||||
|         strip_tags(value) | ||||
|  | ||||
|     If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"`` | ||||
|     the return value will be ``"Joel is a slug"``. Note that ``strip_tags`` | ||||
|     result may still contain unsafe HTML content, so you might use | ||||
|     :func:`~django.utils.html.escape` to make it a safe string. | ||||
|     the return value will be ``"Joel is a slug"``. | ||||
|  | ||||
|     If you are looking for a more robust solution, take a look at the `bleach`_ | ||||
|     Python library. | ||||
|  | ||||
|     .. _bleach: https://pypi.python.org/pypi/bleach | ||||
|  | ||||
|     .. versionchanged:: 1.6 | ||||
|  | ||||
|   | ||||
| @@ -82,6 +82,8 @@ class TestUtilsHtml(TestCase): | ||||
|             ('a<p a >b</p>c', 'abc'), | ||||
|             ('d<a:b c:d>e</p>f', 'def'), | ||||
|             ('<strong>foo</strong><a href="http://example.com">bar</a>', 'foobar'), | ||||
|             ('<sc<!-- -->ript>test<<!-- -->/script>', 'test'), | ||||
|             ('<script>alert()</script>&h', 'alert()&h'), | ||||
|         ) | ||||
|         for value, output in items: | ||||
|             self.check_output(f, value, output) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user