mirror of
https://github.com/django/django.git
synced 2025-10-26 15:16:09 +00:00
Improved strip_tags and clarified documentation
The fact that strip_tags cannot guarantee to really strip all non-safe HTML content was not clear enough. Also see: https://www.djangoproject.com/weblog/2014/mar/22/strip-tags-advisory/
This commit is contained in:
@@ -118,7 +118,10 @@ linebreaks = allow_lazy(linebreaks, six.text_type)
|
||||
|
||||
class MLStripper(HTMLParser):
|
||||
def __init__(self):
|
||||
HTMLParser.__init__(self)
|
||||
if six.PY2:
|
||||
HTMLParser.__init__(self)
|
||||
else:
|
||||
HTMLParser.__init__(self, strict=False)
|
||||
self.reset()
|
||||
self.fed = []
|
||||
|
||||
@@ -135,16 +138,36 @@ class MLStripper(HTMLParser):
|
||||
return ''.join(self.fed)
|
||||
|
||||
|
||||
def strip_tags(value):
|
||||
"""Returns the given HTML with all tags stripped."""
|
||||
def _strip_once(value):
|
||||
"""
|
||||
Internal tag stripping utility used by strip_tags.
|
||||
"""
|
||||
s = MLStripper()
|
||||
try:
|
||||
s.feed(value)
|
||||
s.close()
|
||||
except HTMLParseError:
|
||||
return value
|
||||
try:
|
||||
s.close()
|
||||
except (HTMLParseError, UnboundLocalError) as err:
|
||||
# UnboundLocalError because of http://bugs.python.org/issue17802
|
||||
# on Python 3.2, triggered by strict=False mode of HTMLParser
|
||||
return s.get_data() + s.rawdata
|
||||
else:
|
||||
return s.get_data()
|
||||
|
||||
|
||||
def strip_tags(value):
|
||||
"""Returns the given HTML with all tags stripped."""
|
||||
while True:
|
||||
if not ('<' in value or '>' in value):
|
||||
return value
|
||||
new_value = _strip_once(value)
|
||||
if new_value == value:
|
||||
# _strip_once was not able to detect more tags
|
||||
return value
|
||||
else:
|
||||
value = new_value
|
||||
strip_tags = allow_lazy(strip_tags)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user