mirror of
https://github.com/django/django.git
synced 2025-10-26 15:16:09 +00:00
Merged Unicode branch into trunk (r4952:5608). This should be fully
backwards compatible for all practical purposes. Fixed #2391, #2489, #2996, #3322, #3344, #3370, #3406, #3432, #3454, #3492, #3582, #3690, #3878, #3891, #3937, #4039, #4141, #4227, #4286, #4291, #4300, #4452, #4702 git-svn-id: http://code.djangoproject.com/svn/django/trunk@5609 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
@@ -1,7 +1,10 @@
|
||||
"HTML utilities suitable for global use."
|
||||
|
||||
import re, string
|
||||
from django.utils.encoding import smart_unicode
|
||||
import re
|
||||
import string
|
||||
import urllib
|
||||
from django.utils.encoding import force_unicode, smart_str
|
||||
from django.utils.functional import allow_lazy
|
||||
|
||||
# Configuration for urlize() function
|
||||
LEADING_PUNCTUATION = ['(', '<', '<']
|
||||
@@ -24,32 +27,36 @@ del x # Temporary variable
|
||||
|
||||
def escape(html):
|
||||
"Returns the given HTML with ampersands, quotes and carets encoded"
|
||||
if not isinstance(html, basestring):
|
||||
html = str(html)
|
||||
return html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''')
|
||||
return force_unicode(html).replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''')
|
||||
escape = allow_lazy(escape, unicode)
|
||||
|
||||
def linebreaks(value):
|
||||
"Converts newlines into <p> and <br />s"
|
||||
value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines
|
||||
value = re.sub(r'\r\n|\r|\n', '\n', force_unicode(value)) # normalize newlines
|
||||
paras = re.split('\n{2,}', value)
|
||||
paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
|
||||
return '\n\n'.join(paras)
|
||||
paras = [u'<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras]
|
||||
return u'\n\n'.join(paras)
|
||||
linebreaks = allow_lazy(linebreaks, unicode)
|
||||
|
||||
def strip_tags(value):
|
||||
"Returns the given HTML with all tags stripped"
|
||||
return re.sub(r'<[^>]*?>', '', value)
|
||||
return re.sub(r'<[^>]*?>', '', force_unicode(value))
|
||||
strip_tags = allow_lazy(strip_tags)
|
||||
|
||||
def strip_spaces_between_tags(value):
|
||||
"Returns the given HTML with spaces between tags removed"
|
||||
return re.sub(r'>\s+<', '><', value)
|
||||
return re.sub(r'>\s+<', '><', force_unicode(value))
|
||||
strip_spaces_between_tags = allow_lazy(strip_spaces_between_tags, unicode)
|
||||
|
||||
def strip_entities(value):
|
||||
"Returns the given HTML with all entities (&something;) stripped"
|
||||
return re.sub(r'&(?:\w+|#\d);', '', value)
|
||||
return re.sub(r'&(?:\w+|#\d);', '', force_unicode(value))
|
||||
strip_entities = allow_lazy(strip_entities, unicode)
|
||||
|
||||
def fix_ampersands(value):
|
||||
"Returns the given HTML with all unencoded ampersands encoded correctly"
|
||||
return unencoded_ampersands_re.sub('&', value)
|
||||
return unencoded_ampersands_re.sub('&', force_unicode(value))
|
||||
fix_ampersands = allow_lazy(fix_ampersands, unicode)
|
||||
|
||||
def urlize(text, trim_url_limit=None, nofollow=False):
|
||||
"""
|
||||
@@ -65,7 +72,7 @@ def urlize(text, trim_url_limit=None, nofollow=False):
|
||||
attribute.
|
||||
"""
|
||||
trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x
|
||||
words = word_split_re.split(text)
|
||||
words = word_split_re.split(force_unicode(text))
|
||||
nofollow_attr = nofollow and ' rel="nofollow"' or ''
|
||||
for i, word in enumerate(words):
|
||||
match = punctuation_re.match(word)
|
||||
@@ -82,7 +89,8 @@ def urlize(text, trim_url_limit=None, nofollow=False):
|
||||
middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
|
||||
if lead + middle + trail != word:
|
||||
words[i] = lead + middle + trail
|
||||
return ''.join(words)
|
||||
return u''.join(words)
|
||||
urlize = allow_lazy(urlize, unicode)
|
||||
|
||||
def clean_html(text):
|
||||
"""
|
||||
@@ -97,7 +105,7 @@ def clean_html(text):
|
||||
bottom of the text.
|
||||
"""
|
||||
from django.utils.text import normalize_newlines
|
||||
text = normalize_newlines(text)
|
||||
text = normalize_newlines(force_unicode(text))
|
||||
text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text)
|
||||
text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text)
|
||||
text = fix_ampersands(text)
|
||||
@@ -110,9 +118,10 @@ def clean_html(text):
|
||||
s = match.group().replace('</p>', '</li>')
|
||||
for d in DOTS:
|
||||
s = s.replace('<p>%s' % d, '<li>')
|
||||
return '<ul>\n%s\n</ul>' % s
|
||||
return u'<ul>\n%s\n</ul>' % s
|
||||
text = hard_coded_bullets_re.sub(replace_p_tags, text)
|
||||
# Remove stuff like "<p> </p>", but only if it's at the bottom of the text.
|
||||
text = trailing_empty_content_re.sub('', text)
|
||||
return text
|
||||
clean_html = allow_lazy(clean_html, unicode)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user