From 26e3e7ecb5e4c9af4cd5aa178f65ce1585d3ae07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Langa?= <lukasz@langa.pl>
Date: Sun, 19 May 2013 12:43:34 +0200
Subject: [PATCH] Fixed #11915: generic Accept-Language matches
 country-specific variants

---
 django/middleware/locale.py            |  3 +-
 django/utils/translation/trans_real.py | 50 ++++++++++++++++----------
 tests/i18n/__init__.py                 |  1 +
 tests/i18n/tests.py                    | 15 +++++++-
 4 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/django/middleware/locale.py b/django/middleware/locale.py
index 9b2ef8ff32..25fbbaccdd 100644
--- a/django/middleware/locale.py
+++ b/django/middleware/locale.py
@@ -6,6 +6,7 @@ from django.core.urlresolvers import (is_valid_path, get_resolver,
 from django.http import HttpResponseRedirect
 from django.utils.cache import patch_vary_headers
 from django.utils import translation
+from django.utils.datastructures import SortedDict
 
 
 class LocaleMiddleware(object):
@@ -18,7 +19,7 @@ class LocaleMiddleware(object):
     """
 
     def __init__(self):
-        self._supported_languages = dict(settings.LANGUAGES)
+        self._supported_languages = SortedDict(settings.LANGUAGES)
         self._is_language_prefix_patterns_used = False
         for url_pattern in get_resolver(None).url_patterns:
             if isinstance(url_pattern, LocaleRegexURLResolver):
diff --git a/django/utils/translation/trans_real.py b/django/utils/translation/trans_real.py
index 26be0ed729..0aaa4ec99b 100644
--- a/django/utils/translation/trans_real.py
+++ b/django/utils/translation/trans_real.py
@@ -10,7 +10,9 @@ from threading import local
 import warnings
 
 from django.utils.importlib import import_module
+from django.utils.datastructures import SortedDict
 from django.utils.encoding import force_str, force_text
+from django.utils.functional import memoize
 from django.utils._os import upath
 from django.utils.safestring import mark_safe, SafeData
 from django.utils import six
@@ -29,6 +31,7 @@ _default = None
 # This is a cache for normalized accept-header languages to prevent multiple
 # file lookups when checking the same locale on repeated requests.
 _accepted = {}
+_checked_languages = {}
 
 # magic gettext number to separate context from message
 CONTEXT_SEPARATOR = "\x04"
@@ -355,38 +358,54 @@ def check_for_language(lang_code):
         if gettext_module.find('django', path, [to_locale(lang_code)]) is not None:
             return True
     return False
+check_for_language = memoize(check_for_language, _checked_languages, 1)
 
-def get_supported_language_variant(lang_code, supported=None):
+def get_supported_language_variant(lang_code, supported=None, strict=False):
     """
     Returns the language-code that's listed in supported languages, possibly
     selecting a more generic variant. Raises LookupError if nothing found.
+
+    If `strict` is False (the default), the function will look for an alternative
+    country-specific variant when the currently checked is not found.
     """
     if supported is None:
         from django.conf import settings
-        supported = dict(settings.LANGUAGES)
+        supported = SortedDict(settings.LANGUAGES)
     if lang_code:
-        # e.g. if fr-CA is not supported, try fr-ca;
-        #      if that fails, fallback to fr.
-        variants = (lang_code, lang_code.lower(), lang_code.split('-')[0],
-                    lang_code.lower().split('-')[0])
+        # if fr-CA is not supported, try fr-ca; if that fails, fallback to fr.
+        generic_lang_code = lang_code.split('-')[0]
+        variants = (lang_code, lang_code.lower(), generic_lang_code,
+                    generic_lang_code.lower())
         for code in variants:
             if code in supported and check_for_language(code):
                 return code
+        if not strict:
+            # if fr-fr is not supported, try fr-ca.
+            for supported_code in supported:
+                if supported_code.startswith((generic_lang_code + '-',
+                                              generic_lang_code.lower() + '-')):
+                    return supported_code
     raise LookupError(lang_code)
 
-def get_language_from_path(path, supported=None):
+def get_language_from_path(path, supported=None, strict=False):
     """
     Returns the language-code if there is a valid language-code
     found in the `path`.
+
+    If `strict` is False (the default), the function will look for an alternative
+    country-specific variant when the currently checked is not found.
     """
     if supported is None:
         from django.conf import settings
-        supported = dict(settings.LANGUAGES)
+        supported = SortedDict(settings.LANGUAGES)
     regex_match = language_code_prefix_re.match(path)
-    if regex_match:
-        lang_code = regex_match.group(1)
-        if lang_code in supported and check_for_language(lang_code):
-            return lang_code
+    if not regex_match:
+        return None
+    lang_code = regex_match.group(1)
+    try:
+        return get_supported_language_variant(lang_code, supported, strict=strict)
+    except LookupError:
+        return None
 
 def get_language_from_request(request, check_path=False):
     """
@@ -400,7 +419,7 @@ def get_language_from_request(request, check_path=False):
     """
     global _accepted
     from django.conf import settings
-    supported = dict(settings.LANGUAGES)
+    supported = SortedDict(settings.LANGUAGES)
 
     if check_path:
         lang_code = get_language_from_path(request.path_info, supported)
@@ -424,11 +443,6 @@ def get_language_from_request(request, check_path=False):
         if accept_lang == '*':
             break
 
-        # We have a very restricted form for our language files (no encoding
-        # specifier, since they all must be UTF-8 and only one possible
-        # language each time. So we avoid the overhead of gettext.find() and
-        # work out the MO file manually.
-
         # 'normalized' is the root name of the locale in POSIX format (which is
         # the format used for the directories holding the MO files).
         normalized = locale.locale_alias.get(to_locale(accept_lang, True))
diff --git a/tests/i18n/__init__.py b/tests/i18n/__init__.py
index a3e9ce7053..c5aaa31fe3 100644
--- a/tests/i18n/__init__.py
+++ b/tests/i18n/__init__.py
@@ -11,6 +11,7 @@ class TransRealMixin(object):
         trans_real._active = local()
         trans_real._default = None
         trans_real._accepted = {}
+        trans_real._checked_languages = {}
 
     def tearDown(self):
         self.flush_caches()
diff --git a/tests/i18n/tests.py b/tests/i18n/tests.py
index 137270f830..9f1e366c9f 100644
--- a/tests/i18n/tests.py
+++ b/tests/i18n/tests.py
@@ -1157,6 +1157,7 @@ class LocaleMiddlewareTests(TransRealMixin, TestCase):
     LANGUAGES=(
         ('bg', 'Bulgarian'),
         ('en-us', 'English'),
+        ('pt-br', 'Portugese (Brazil)'),
     ),
     MIDDLEWARE_CLASSES=(
         'django.middleware.locale.LocaleMiddleware',
@@ -1176,7 +1177,6 @@ class CountrySpecificLanguageTests(TransRealMixin, TestCase):
         self.assertTrue(check_for_language('en-us'))
         self.assertTrue(check_for_language('en-US'))
 
-
     def test_get_language_from_request(self):
         # issue 19919
         r = self.rf.get('/')
@@ -1189,3 +1189,16 @@ class CountrySpecificLanguageTests(TransRealMixin, TestCase):
         r.META = {'HTTP_ACCEPT_LANGUAGE': 'bg-bg,en-US;q=0.8,en;q=0.6,ru;q=0.4'}
         lang = get_language_from_request(r)
         self.assertEqual('bg', lang)
+
+    def test_specific_language_codes(self):
+        # issue 11915
+        r = self.rf.get('/')
+        r.COOKIES = {}
+        r.META = {'HTTP_ACCEPT_LANGUAGE': 'pt,en-US;q=0.8,en;q=0.6,ru;q=0.4'}
+        lang = get_language_from_request(r)
+        self.assertEqual('pt-br', lang)
+        r = self.rf.get('/')
+        r.COOKIES = {}
+        r.META = {'HTTP_ACCEPT_LANGUAGE': 'pt-pt,en-US;q=0.8,en;q=0.6,ru;q=0.4'}
+        lang = get_language_from_request(r)
+        self.assertEqual('pt-br', lang)