1
0
mirror of https://github.com/django/django.git synced 2025-06-05 11:39:13 +00:00

Fixed #21389 -- Accept most valid language codes

By removing the 'supported' keyword from the detection methods and only relying
on a cached settings.LANGUAGES, the speed of said methods has been improved;
around 4x raw performance. This allows us to stop checking Python's incomplete
list of locales, and rely on a less restrictive regular expression for
accepting certain locales.

HTTP Accept-Language is defined as being case-insensitive, based on this fact
extra performance improvements have been made; it wouldn't make sense to
check for case differences.
This commit is contained in:
Bouke Haarsma 2013-11-12 07:54:01 +01:00 committed by Claude Paroz
parent 48a8b714d4
commit 2bab9d6d9e
6 changed files with 68 additions and 58 deletions

View File

@ -1,7 +1,5 @@
"This is the locale selecting middleware that will look at accept headers" "This is the locale selecting middleware that will look at accept headers"
from collections import OrderedDict
from django.conf import settings from django.conf import settings
from django.core.urlresolvers import (is_valid_path, get_resolver, from django.core.urlresolvers import (is_valid_path, get_resolver,
LocaleRegexURLResolver) LocaleRegexURLResolver)
@ -21,7 +19,6 @@ class LocaleMiddleware(object):
response_redirect_class = HttpResponseRedirect response_redirect_class = HttpResponseRedirect
def __init__(self): def __init__(self):
self._supported_languages = OrderedDict(settings.LANGUAGES)
self._is_language_prefix_patterns_used = False self._is_language_prefix_patterns_used = False
for url_pattern in get_resolver(None).url_patterns: for url_pattern in get_resolver(None).url_patterns:
if isinstance(url_pattern, LocaleRegexURLResolver): if isinstance(url_pattern, LocaleRegexURLResolver):
@ -37,9 +34,7 @@ class LocaleMiddleware(object):
def process_response(self, request, response): def process_response(self, request, response):
language = translation.get_language() language = translation.get_language()
language_from_path = translation.get_language_from_path( language_from_path = translation.get_language_from_path(request.path_info)
request.path_info, supported=self._supported_languages
)
if (response.status_code == 404 and not language_from_path if (response.status_code == 404 and not language_from_path
and self.is_language_prefix_patterns_used()): and self.is_language_prefix_patterns_used()):
urlconf = getattr(request, 'urlconf', None) urlconf = getattr(request, 'urlconf', None)

View File

@ -187,8 +187,8 @@ def get_language_from_request(request, check_path=False):
return _trans.get_language_from_request(request, check_path) return _trans.get_language_from_request(request, check_path)
def get_language_from_path(path, supported=None): def get_language_from_path(path):
return _trans.get_language_from_path(path, supported=supported) return _trans.get_language_from_path(path)
def templatize(src, origin=None): def templatize(src, origin=None):

View File

@ -68,5 +68,5 @@ def get_language_from_request(request, check_path=False):
return settings.LANGUAGE_CODE return settings.LANGUAGE_CODE
def get_language_from_path(request, supported=None): def get_language_from_path(request):
return None return None

View File

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from collections import OrderedDict from collections import OrderedDict
import locale
import os import os
import re import re
import sys import sys
@ -29,9 +28,9 @@ _active = local()
# The default translation is based on the settings file. # The default translation is based on the settings file.
_default = None _default = None
# This is a cache for normalized accept-header languages to prevent multiple # This is a cache of settings.LANGUAGES in an OrderedDict for easy lookups by
# file lookups when checking the same locale on repeated requests. # key
_accepted = {} _supported = None
# magic gettext number to separate context from message # magic gettext number to separate context from message
CONTEXT_SEPARATOR = "\x04" CONTEXT_SEPARATOR = "\x04"
@ -63,9 +62,11 @@ def reset_cache(**kwargs):
Reset global state when LANGUAGES setting has been changed, as some Reset global state when LANGUAGES setting has been changed, as some
languages should no longer be accepted. languages should no longer be accepted.
""" """
if kwargs['setting'] == 'LANGUAGES': if kwargs['setting'] in ('LANGUAGES', 'LANGUAGE_CODE'):
global _accepted global _supported
_accepted = {} _supported = None
check_for_language.cache_clear()
get_supported_language_variant.cache_clear()
def to_locale(language, to_lower=False): def to_locale(language, to_lower=False):
@ -388,7 +389,7 @@ def all_locale_paths():
return [globalpath] + list(settings.LOCALE_PATHS) return [globalpath] + list(settings.LOCALE_PATHS)
@lru_cache.lru_cache(maxsize=None) @lru_cache.lru_cache()
def check_for_language(lang_code): def check_for_language(lang_code):
""" """
Checks whether there is a global language file for the given language Checks whether there is a global language file for the given language
@ -404,39 +405,42 @@ def check_for_language(lang_code):
return False return False
def get_supported_language_variant(lang_code, supported=None, strict=False): @lru_cache.lru_cache(maxsize=1000)
def get_supported_language_variant(lang_code, strict=False):
""" """
Returns the language-code that's listed in supported languages, possibly Returns the language-code that's listed in supported languages, possibly
selecting a more generic variant. Raises LookupError if nothing found. selecting a more generic variant. Raises LookupError if nothing found.
If `strict` is False (the default), the function will look for an alternative If `strict` is False (the default), the function will look for an alternative
country-specific variant when the currently checked is not found. country-specific variant when the currently checked is not found.
lru_cache should have a maxsize to prevent from memory exhaustion attacks,
as the provided language codes are taken from the HTTP request. See also
<https://www.djangoproject.com/weblog/2007/oct/26/security-fix/>.
""" """
if supported is None: global _supported
if _supported is None:
from django.conf import settings from django.conf import settings
supported = OrderedDict(settings.LANGUAGES) _supported = OrderedDict(settings.LANGUAGES)
if lang_code: if lang_code:
# some browsers use deprecated language codes -- #18419 # some browsers use deprecated language codes -- #18419
replacement = _BROWSERS_DEPRECATED_LOCALES.get(lang_code) replacement = _BROWSERS_DEPRECATED_LOCALES.get(lang_code)
if lang_code not in supported and replacement in supported: if lang_code not in _supported and replacement in _supported:
return replacement return replacement
# if fr-CA is not supported, try fr-ca; if that fails, fallback to fr. # if fr-ca is not supported, try fr.
generic_lang_code = lang_code.split('-')[0] generic_lang_code = lang_code.split('-')[0]
variants = (lang_code, lang_code.lower(), generic_lang_code, for code in (lang_code, generic_lang_code):
generic_lang_code.lower()) if code in _supported and check_for_language(code):
for code in variants:
if code in supported and check_for_language(code):
return code return code
if not strict: if not strict:
# if fr-fr is not supported, try fr-ca. # if fr-fr is not supported, try fr-ca.
for supported_code in supported: for supported_code in _supported:
if supported_code.startswith((generic_lang_code + '-', if supported_code.startswith(generic_lang_code + '-'):
generic_lang_code.lower() + '-')):
return supported_code return supported_code
raise LookupError(lang_code) raise LookupError(lang_code)
def get_language_from_path(path, supported=None, strict=False): def get_language_from_path(path, strict=False):
""" """
Returns the language-code if there is a valid language-code Returns the language-code if there is a valid language-code
found in the `path`. found in the `path`.
@ -444,15 +448,12 @@ def get_language_from_path(path, supported=None, strict=False):
If `strict` is False (the default), the function will look for an alternative If `strict` is False (the default), the function will look for an alternative
country-specific variant when the currently checked is not found. country-specific variant when the currently checked is not found.
""" """
if supported is None:
from django.conf import settings
supported = OrderedDict(settings.LANGUAGES)
regex_match = language_code_prefix_re.match(path) regex_match = language_code_prefix_re.match(path)
if not regex_match: if not regex_match:
return None return None
lang_code = regex_match.group(1) lang_code = regex_match.group(1)
try: try:
return get_supported_language_variant(lang_code, supported, strict=strict) return get_supported_language_variant(lang_code, strict=strict)
except LookupError: except LookupError:
return None return None
@ -467,25 +468,26 @@ def get_language_from_request(request, check_path=False):
If check_path is True, the URL path prefix will be checked for a language If check_path is True, the URL path prefix will be checked for a language
code, otherwise this is skipped for backwards compatibility. code, otherwise this is skipped for backwards compatibility.
""" """
global _accepted
from django.conf import settings from django.conf import settings
supported = OrderedDict(settings.LANGUAGES) global _supported
if _supported is None:
_supported = OrderedDict(settings.LANGUAGES)
if check_path: if check_path:
lang_code = get_language_from_path(request.path_info, supported) lang_code = get_language_from_path(request.path_info)
if lang_code is not None: if lang_code is not None:
return lang_code return lang_code
if hasattr(request, 'session'): if hasattr(request, 'session'):
# for backwards compatibility django_language is also checked (remove in 1.8) # for backwards compatibility django_language is also checked (remove in 1.8)
lang_code = request.session.get(LANGUAGE_SESSION_KEY, request.session.get('django_language')) lang_code = request.session.get(LANGUAGE_SESSION_KEY, request.session.get('django_language'))
if lang_code in supported and lang_code is not None and check_for_language(lang_code): if lang_code in _supported and lang_code is not None and check_for_language(lang_code):
return lang_code return lang_code
lang_code = request.COOKIES.get(settings.LANGUAGE_COOKIE_NAME) lang_code = request.COOKIES.get(settings.LANGUAGE_COOKIE_NAME)
try: try:
return get_supported_language_variant(lang_code, supported) return get_supported_language_variant(lang_code)
except LookupError: except LookupError:
pass pass
@ -494,29 +496,16 @@ def get_language_from_request(request, check_path=False):
if accept_lang == '*': if accept_lang == '*':
break break
# 'normalized' is the root name of the locale in POSIX format (which is if not language_code_re.search(accept_lang):
# the format used for the directories holding the MO files).
normalized = locale.locale_alias.get(to_locale(accept_lang, True))
if not normalized:
continue continue
# Remove the default encoding from locale_alias.
normalized = normalized.split('.')[0]
if normalized in _accepted:
# We've seen this locale before and have an MO file for it, so no
# need to check again.
return _accepted[normalized]
try: try:
accept_lang = get_supported_language_variant(accept_lang, supported) return get_supported_language_variant(accept_lang)
except LookupError: except LookupError:
continue continue
else:
_accepted[normalized] = accept_lang
return accept_lang
try: try:
return get_supported_language_variant(settings.LANGUAGE_CODE, supported) return get_supported_language_variant(settings.LANGUAGE_CODE)
except LookupError: except LookupError:
return settings.LANGUAGE_CODE return settings.LANGUAGE_CODE
@ -732,7 +721,7 @@ def parse_accept_lang_header(lang_string):
Any format errors in lang_string results in an empty list being returned. Any format errors in lang_string results in an empty list being returned.
""" """
result = [] result = []
pieces = accept_language_re.split(lang_string) pieces = accept_language_re.split(lang_string.lower())
if pieces[-1]: if pieces[-1]:
return [] return []
for i in range(0, len(pieces) - 1, 3): for i in range(0, len(pieces) - 1, 3):

View File

@ -1125,6 +1125,14 @@ Miscellaneous
For example, if you use multi-inheritance, you need to define custom primary For example, if you use multi-inheritance, you need to define custom primary
key fields on parent models, otherwise the default ``id`` fields will clash. key fields on parent models, otherwise the default ``id`` fields will clash.
* :meth:`~django.utils.translation.parse_accept_lang_header` now returns
lowercase locales, instead of the case as it was provided. As locales should
be treated case-insensitive this allows us to speed up locale detection.
* :meth:`~django.utils.translation.get_language_from_path` and
:meth:`~django.utils.translation.trans_real.get_supported_language_variant`
now no longer have a ``supported`` argument.
.. _deprecated-features-1.7: .. _deprecated-features-1.7:
Features deprecated in 1.7 Features deprecated in 1.7

View File

@ -821,10 +821,10 @@ class MiscTests(TestCase):
p = trans_real.parse_accept_lang_header p = trans_real.parse_accept_lang_header
# Good headers. # Good headers.
self.assertEqual([('de', 1.0)], p('de')) self.assertEqual([('de', 1.0)], p('de'))
self.assertEqual([('en-AU', 1.0)], p('en-AU')) self.assertEqual([('en-au', 1.0)], p('en-AU'))
self.assertEqual([('es-419', 1.0)], p('es-419')) self.assertEqual([('es-419', 1.0)], p('es-419'))
self.assertEqual([('*', 1.0)], p('*;q=1.00')) self.assertEqual([('*', 1.0)], p('*;q=1.00'))
self.assertEqual([('en-AU', 0.123)], p('en-AU;q=0.123')) self.assertEqual([('en-au', 0.123)], p('en-AU;q=0.123'))
self.assertEqual([('en-au', 0.5)], p('en-au;q=0.5')) self.assertEqual([('en-au', 0.5)], p('en-au;q=0.5'))
self.assertEqual([('en-au', 1.0)], p('en-au;q=1.0')) self.assertEqual([('en-au', 1.0)], p('en-au;q=1.0'))
self.assertEqual([('da', 1.0), ('en', 0.5), ('en-gb', 0.25)], p('da, en-gb;q=0.25, en;q=0.5')) self.assertEqual([('da', 1.0), ('en', 0.5), ('en-gb', 0.25)], p('da, en-gb;q=0.25, en;q=0.5'))
@ -884,6 +884,24 @@ class MiscTests(TestCase):
r.META = {'HTTP_ACCEPT_LANGUAGE': 'zh-cn,de'} r.META = {'HTTP_ACCEPT_LANGUAGE': 'zh-cn,de'}
self.assertEqual(g(r), 'zh-cn') self.assertEqual(g(r), 'zh-cn')
r.META = {'HTTP_ACCEPT_LANGUAGE': 'NL'}
self.assertEqual('nl', g(r))
r.META = {'HTTP_ACCEPT_LANGUAGE': 'fy'}
self.assertEqual('fy', g(r))
r.META = {'HTTP_ACCEPT_LANGUAGE': 'ia'}
self.assertEqual('ia', g(r))
r.META = {'HTTP_ACCEPT_LANGUAGE': 'sr-latn'}
self.assertEqual('sr-latn', g(r))
r.META = {'HTTP_ACCEPT_LANGUAGE': 'zh-hans'}
self.assertEqual('zh-hans', g(r))
r.META = {'HTTP_ACCEPT_LANGUAGE': 'zh-hant'}
self.assertEqual('zh-hant', g(r))
@override_settings( @override_settings(
LANGUAGES=( LANGUAGES=(
('en', 'English'), ('en', 'English'),