From a63a83e5d88cd1696d1c40e89f254f69116c6800 Mon Sep 17 00:00:00 2001
From: Malcolm Tredinnick <malcolm.tredinnick@gmail.com>
Date: Sun, 31 Aug 2008 11:11:20 +0000
Subject: [PATCH] A rewrite of the reverse URL parsing: the reverse() call and
 the "url" template tag.

This is fully backwards compatible, but it fixes a bunch of little bugs. Thanks
to SmileyChris and Ilya Semenov for some early patches in this area that were
incorporated into this change.

Fixed #2977, #4915, #6934, #7206.


git-svn-id: http://code.djangoproject.com/svn/django/trunk@8760 bcc190cf-cafb-0310-a4f2-bffc1f526a37
---
 django/core/urlresolvers.py                   | 115 ++-----
 django/utils/regex_helper.py                  | 323 ++++++++++++++++++
 docs/topics/http/urls.txt                     |   7 +
 tests/regressiontests/templates/tests.py      |   3 +-
 .../urlpatterns_reverse/included_urls.py      |   8 +
 .../urlpatterns_reverse/tests.py              |  93 +++--
 .../urlpatterns_reverse/urls.py               |  46 +++
 .../urlpatterns_reverse/views.py              |   2 +
 8 files changed, 482 insertions(+), 115 deletions(-)
 create mode 100644 django/utils/regex_helper.py
 create mode 100644 tests/regressiontests/urlpatterns_reverse/included_urls.py
 create mode 100644 tests/regressiontests/urlpatterns_reverse/urls.py
 create mode 100644 tests/regressiontests/urlpatterns_reverse/views.py
diff --git a/django/core/urlresolvers.py b/django/core/urlresolvers.py
index c861c703a3..adf89dac48 100644
--- a/django/core/urlresolvers.py
+++ b/django/core/urlresolvers.py
@@ -13,12 +13,14 @@ from django.http import Http404
 from django.core.exceptions import ImproperlyConfigured, ViewDoesNotExist
 from django.utils.encoding import iri_to_uri, force_unicode, smart_str
 from django.utils.functional import memoize
+from django.utils.regex_helper import normalize
 from django.utils.thread_support import currentThread
 
 try:
     reversed
 except NameError:
     from django.utils.itercompat import reversed     # Python 2.3 fallback
+    from sets import Set as set
 
 _resolver_cache = {} # Maps urlconf modules to RegexURLResolver instances.
 _callable_cache = {} # Maps view and url pattern names to their view functions.
@@ -78,66 +80,6 @@ def get_mod_func(callback):
         return callback, ''
     return callback[:dot], callback[dot+1:]
 
-def reverse_helper(regex, *args, **kwargs):
-    """
-    Does a "reverse" lookup -- returns the URL for the given args/kwargs.
-    The args/kwargs are applied to the given compiled regular expression.
-    For example:
-
-        >>> reverse_helper(re.compile('^places/(\d+)/$'), 3)
-        'places/3/'
-        >>> reverse_helper(re.compile('^places/(?P<id>\d+)/$'), id=3)
-        'places/3/'
-        >>> reverse_helper(re.compile('^people/(?P<state>\w\w)/(\w+)/$'), 'adrian', state='il')
-        'people/il/adrian/'
-
-    Raises NoReverseMatch if the args/kwargs aren't valid for the regex.
-    """
-    # TODO: Handle nested parenthesis in the following regex.
-    result = re.sub(r'\(([^)]+)\)', MatchChecker(args, kwargs), regex.pattern)
-    return result.replace('^', '').replace('$', '').replace('\\', '')
-
-class MatchChecker(object):
-    "Class used in reverse RegexURLPattern lookup."
-    def __init__(self, args, kwargs):
-        self.args, self.kwargs = args, kwargs
-        self.current_arg = 0
-
-    def __call__(self, match_obj):
-        # match_obj.group(1) is the contents of the parenthesis.
-        # First we need to figure out whether it's a named or unnamed group.
-        #
-        grouped = match_obj.group(1)
-        m = re.search(r'^\?P<(\w+)>(.*?)$', grouped, re.UNICODE)
-        if m: # If this was a named group...
-            # m.group(1) is the name of the group
-            # m.group(2) is the regex.
-            try:
-                value = self.kwargs[m.group(1)]
-            except KeyError:
-                # It was a named group, but the arg was passed in as a
-                # positional arg or not at all.
-                try:
-                    value = self.args[self.current_arg]
-                    self.current_arg += 1
-                except IndexError:
-                    # The arg wasn't passed in.
-                    raise NoReverseMatch('Not enough positional arguments passed in')
-            test_regex = m.group(2)
-        else: # Otherwise, this was a positional (unnamed) group.
-            try:
-                value = self.args[self.current_arg]
-                self.current_arg += 1
-            except IndexError:
-                # The arg wasn't passed in.
-                raise NoReverseMatch('Not enough positional arguments passed in')
-            test_regex = grouped
-        # Note we're using re.match here on purpose because the start of
-        # to string needs to match.
-        if not re.match(test_regex + '$', force_unicode(value), re.UNICODE):
-            raise NoReverseMatch("Value %r didn't match regular expression %r" % (value, test_regex))
-        return force_unicode(value)
-
 class RegexURLPattern(object):
     def __init__(self, regex, callback, default_args=None, name=None):
         # regex is a string representing a regular expression.
@@ -194,21 +136,6 @@ class RegexURLPattern(object):
         return self._callback
     callback = property(_get_callback)
 
-    def reverse(self, viewname, *args, **kwargs):
-        mod_name, func_name = get_mod_func(viewname)
-        try:
-            lookup_view = getattr(__import__(mod_name, {}, {}, ['']), func_name)
-        except ImportError, e:
-            raise NoReverseMatch("Could not import '%s': %s" % (mod_name, e))
-        except AttributeError, e:
-            raise NoReverseMatch("'%s' has no attribute '%s'" % (mod_name, func_name))
-        if lookup_view != self.callback:
-            raise NoReverseMatch("Reversed view '%s' doesn't match the expected callback ('%s')." % (viewname, self.callback))
-        return self.reverse_helper(*args, **kwargs)
-
-    def reverse_helper(self, *args, **kwargs):
-        return reverse_helper(self.regex, *args, **kwargs)
-
 class RegexURLResolver(object):
     def __init__(self, regex, urlconf_name, default_kwargs=None):
         # regex is a string representing a regular expression.
@@ -225,12 +152,21 @@ class RegexURLResolver(object):
     def _get_reverse_dict(self):
         if not self._reverse_dict and hasattr(self.urlconf_module, 'urlpatterns'):
             for pattern in reversed(self.urlconf_module.urlpatterns):
+                p_pattern = pattern.regex.pattern
+                if p_pattern.startswith('^'):
+                    p_pattern = p_pattern[1:]
                 if isinstance(pattern, RegexURLResolver):
-                    for key, value in pattern.reverse_dict.iteritems():
-                        self._reverse_dict[key] = (pattern,) + value
+                    parent = normalize(pattern.regex.pattern)
+                    for name, (matches, pat) in pattern.reverse_dict.iteritems():
+                        new_matches = []
+                        for piece, p_args in parent:
+                            new_matches.extend([(piece + suffix, p_args + args)
+                                    for (suffix, args) in matches])
+                        self._reverse_dict[name] = new_matches, p_pattern + pat
                 else:
-                    self._reverse_dict[pattern.callback] = (pattern,)
-                    self._reverse_dict[pattern.name] = (pattern,)
+                    bits = normalize(p_pattern)
+                    self._reverse_dict[pattern.callback] = bits, p_pattern
+                    self._reverse_dict[pattern.name] = bits, p_pattern
         return self._reverse_dict
     reverse_dict = property(_get_reverse_dict)
 
@@ -281,20 +217,27 @@ class RegexURLResolver(object):
         return self._resolve_special('500')
 
     def reverse(self, lookup_view, *args, **kwargs):
+        if args and kwargs:
+            raise ValueError("Don't mix *args and **kwargs in call to reverse()!")
         try:
             lookup_view = get_callable(lookup_view, True)
         except (ImportError, AttributeError), e:
             raise NoReverseMatch("Error importing '%s': %s." % (lookup_view, e))
-        if lookup_view in self.reverse_dict:
-            return u''.join([reverse_helper(part.regex, *args, **kwargs) for part in self.reverse_dict[lookup_view]])
+        possibilities, pattern = self.reverse_dict.get(lookup_view, [(), ()])
+        for result, params in possibilities:
+            if args:
+                if len(args) != len(params):
+                    continue
+                candidate =  result % dict(zip(params, args))
+            else:
+                if set(kwargs.keys()) != set(params):
+                    continue
+                candidate = result % kwargs
+            if re.search('^%s' % pattern, candidate, re.UNICODE):
+                return candidate
         raise NoReverseMatch("Reverse for '%s' with arguments '%s' and keyword "
                 "arguments '%s' not found." % (lookup_view, args, kwargs))
 
-    def reverse_helper(self, lookup_view, *args, **kwargs):
-        sub_match = self.reverse(lookup_view, *args, **kwargs)
-        result = reverse_helper(self.regex, *args, **kwargs)
-        return result + sub_match
-
 def resolve(path, urlconf=None):
     return get_resolver(urlconf).resolve(path)
 
diff --git a/django/utils/regex_helper.py b/django/utils/regex_helper.py
new file mode 100644
index 0000000000..3e3d349433
--- /dev/null
+++ b/django/utils/regex_helper.py
@@ -0,0 +1,323 @@
+"""
+Functions for reversing a regular expression (used in reverse URL resolving).
+Used internally by Django and not intended for external use.
+
+This is not, and is not intended to be, a complete reg-exp decompiler. It
+should be good enough for a large class of URLS, however.
+"""
+
+# Mapping of an escape character to a representative of that class. So, e.g.,
+# "\w" is replaced by "x" in a reverse URL. A value of None means to ignore
+# this sequence. Any missing key is mapped to itself.
+ESCAPE_MAPPINGS = {
+    "A": None,
+    "b": None,
+    "B": None,
+    "d": '0',
+    "D": "x",
+    "s": " ",
+    "S": "x",
+    "w": "x",
+    "W": "!",
+    "Z": None,
+}
+
+class Choice(list):
+    """
+    Used to represent multiple possibilities at this point in a pattern string.
+    We use a distinguished type, rather than a list, so that the usage in the
+    code is clear.
+    """
+
+class Group(list):
+    """
+    Used to represent a capturing group in the pattern string.
+    """
+
+class NonCapture(list):
+    """
+    Used to represent a non-capturing group in the pattern string.
+    """
+
+def normalize(pattern):
+    """
+    Given a reg-exp pattern, normalizes it to a list of forms that suffice for
+    reverse matching. This does the following:
+
+    (1) For any repeating sections, keeps the minimum number of occurrences
+        permitted (this means zero for optional groups).
+    (2) If an optional group includes parameters, include one occurrence of
+        that group (along with the zero occurrence case from step (1)).
+    (3) Select the first (essentially an arbitrary) element from any character
+        class. Select an arbitrary character for any unordered class (e.g. '.'
+        or '\w') in the pattern.
+    (5) Ignore comments and any of the reg-exp flags that won't change
+        what we construct ("iLmsu"). "(?x)" is an error, however.
+    (6) Raise an error on all other non-capturing (?...) forms (e.g.
+        look-ahead and look-behind matches) and any disjunctive ('|')
+        constructs.
+
+    Django's URLs for forward resolving are either all positional arguments or
+    all keyword arguments. That is assumed here, as well. Although reverse
+    resolving can be done using positional args when keyword args are
+    specified, the two cannot be mixed in the same reverse() call.
+    """
+    # Do a linear scan to work out the special features of this pattern. The
+    # idea is that we scan once here and collect all the information we need to
+    # make future decisions.
+    result = []
+    non_capturing_groups = []
+    consume_next = True
+    pattern_iter = next_char(iter(pattern))
+    num_args = 0
+
+    # A "while" loop is used here because later on we need to be able to peek
+    # at the next character and possibly go around without consuming another
+    # one at the top of the loop.
+    ch, escaped = pattern_iter.next()
+    try:
+        while True:
+            if escaped:
+                result.append(ch)
+            elif ch == '.':
+                # Replace "any character" with an arbitrary representative.
+                result.append("x")
+            elif ch == '|':
+                # FIXME: One day we'll should do this, but not in 1.0.
+                raise NotImplementedError
+            elif ch == "^":
+                pass
+            elif ch == '$':
+                break
+            elif ch == ')':
+                # This can only be the end of a non-capturing group, since all
+                # other unescaped parentheses are handled by the grouping
+                # section later (and the full group is handled there).
+                #
+                # We regroup everything inside the capturing group so that it
+                # can be quantified, if necessary.
+                start = non_capturing_groups.pop()
+                inner = NonCapture(result[start:])
+                result = result[:start] + [inner]
+            elif ch == '[':
+                # Replace ranges with the first character in the range.
+                ch, escaped = pattern_iter.next()
+                result.append(ch)
+                ch, escaped = pattern_iter.next()
+                while escaped or ch != ']':
+                    ch, escaped = pattern_iter.next()
+            elif ch == '(':
+                # Some kind of group.
+                ch, escaped = pattern_iter.next()
+                if ch != '?' or escaped:
+                    # A positional group
+                    name = "_%d" % num_args
+                    num_args += 1
+                    result.append(Group((("%%(%s)s" % name), name)))
+                    walk_to_end(ch, pattern_iter)
+                else:
+                    ch, escaped = pattern_iter.next()
+                    if ch in "iLmsu#":
+                        # All of these are ignorable. Walk to the end of the
+                        # group.
+                        walk_to_end(ch, pattern_iter)
+                    elif ch == ':':
+                        # Non-capturing group
+                        non_capturing_groups.append(len(result))
+                    elif ch != 'P':
+                        # Anything else, other than a named group, is something
+                        # we cannot reverse.
+                        raise ValueError("Non-reversible reg-exp portion: '(?%s'" % ch)
+                    else:
+                        ch, escaped = pattern_iter.next()
+                        if ch != '<':
+                            raise ValueError("Non-reversible reg-exp portion: '(?P%s'" % ch)
+                        # We are in a named capturing group. Extra the name and
+                        # then skip to the end.
+                        name = []
+                        ch, escaped = pattern_iter.next()
+                        while ch != '>':
+                            name.append(ch)
+                            ch, escaped = pattern_iter.next()
+                        param = ''.join(name)
+                        result.append(Group((("%%(%s)s" % param), param)))
+                        walk_to_end(ch, pattern_iter)
+            elif ch in "*?+{":
+                # Quanitifers affect the previous item in the result list.
+                count, ch = get_quantifier(ch, pattern_iter)
+                if ch:
+                    # We had to look ahead, but it wasn't need to compute the
+                    # quanitifer, so use this character next time around the
+                    # main loop.
+                    consume_next = False
+
+                if count == 0:
+                    if contains(result[-1], Group):
+                        # If we are quantifying a capturing group (or
+                        # something containing such a group) and the minimum is
+                        # zero, we must also handle the case of one occurrence
+                        # being present. All the quantifiers (except {0,0},
+                        # which we conveniently ignore) that have a 0 minimum
+                        # also allow a single occurrence.
+                        result[-1] = Choice([None, result[-1]])
+                    else:
+                        result.pop()
+                elif count > 1:
+                    result.extend([result[-1]] * (count - 1))
+            else:
+                # Anything else is a literal.
+                result.append(ch)
+
+            if consume_next:
+                ch, escaped = pattern_iter.next()
+            else:
+                consume_next = True
+    except StopIteration:
+        pass
+    except NotImplementedError:
+        # A case of using the disjunctive form. No results for you!
+        return zip([''],  [[]])
+
+    return zip(*flatten_result(result))
+
+def next_char(input_iter):
+    """
+    An iterator that yields the next character from "pattern_iter", respecting
+    escape sequences. An escaped character is replaced by a representative of
+    its class (e.g. \w -> "x"). If the escaped character is one that is
+    skipped, it is not returned (the next character is returned instead).
+
+    Yields the next character, along with a boolean indicating whether it is a
+    raw (unescaped) character or not.
+    """
+    for ch in input_iter:
+        if ch != '\\':
+            yield ch, False
+            continue
+        ch = input_iter.next()
+        representative = ESCAPE_MAPPINGS.get(ch, ch)
+        if representative is None:
+            continue
+        yield representative, True
+
+def walk_to_end(ch, input_iter):
+    """
+    The iterator is currently inside a capturing group. We want to walk to the
+    close of this group, skipping over any nested groups and handling escaped
+    parentheses correctly.
+    """
+    if ch == '(':
+        nesting = 1
+    else:
+        nesting = 0
+    for ch, escaped in input_iter:
+        if escaped:
+            continue
+        elif ch == '(':
+            nesting += 1
+        elif ch == ')':
+            if not nesting:
+                return
+            nesting -= 1
+
+def get_quantifier(ch, input_iter):
+    """
+    Parse a quantifier from the input, where "ch" is the first character in the
+    quantifier.
+
+    Returns the minimum number of occurences permitted by the quantifier and
+    either None or the next character from the input_iter if the next character
+    is not part of the quantifier.
+    """
+    if ch in '*?+':
+        try:
+            ch2, escaped = input_iter.next()
+        except StopIteration:
+            ch2 = None
+        if ch2 == '?':
+            ch2 = None
+        if ch == '+':
+            return 1, ch2
+        return 0, ch2
+
+    quant = []
+    while ch != '}':
+        ch, escaped = input_iter.next()
+        quant.append(ch)
+    values = ''.join(quant).split(',')
+
+    # Consume the trailing '?', if necessary.
+    try:
+        ch, escaped = input_iter.next()
+    except StopIteration:
+        ch = None
+    if ch == '?':
+        ch = None
+    return int(values[0]), ch
+
+def contains(source, inst):
+    """
+    Returns True if the "source" contains an instance of "inst". False,
+    otherwise.
+    """
+    if isinstance(source, inst):
+        return True
+    if isinstance(source, NonCapture):
+        for elt in source:
+            if contains(elt, inst):
+                return True
+    return False
+
+def flatten_result(source):
+    """
+    Turns the given source sequence into a list of reg-exp possibilities and
+    their arguments. Returns a list of strings and a list of argument lists.
+    Each of the two lists will be of the same length.
+    """
+    if source is None:
+        return [''], [[]]
+    if isinstance(source, Group):
+        if source[1] is None:
+            params = []
+        else:
+            params = [source[1]]
+        return [source[0]], [params]
+    result = ['']
+    result_args = [[]]
+    pos = last = 0
+    for pos, elt in enumerate(source):
+        if isinstance(elt, basestring):
+            continue
+        piece = ''.join(source[last:pos])
+        if isinstance(elt, Group):
+            piece += elt[0]
+            param = elt[1]
+        else:
+            param = None
+        last = pos + 1
+        for i in range(len(result)):
+            result[i] += piece
+            if param:
+                result_args[i].append(param)
+        if isinstance(elt, (Choice, NonCapture)):
+            if isinstance(elt, NonCapture):
+                elt = [elt]
+            inner_result, inner_args = [], []
+            for item in elt:
+                res, args = flatten_result(item)
+                inner_result.extend(res)
+                inner_args.extend(args)
+            new_result = []
+            new_args = []
+            for item, args in zip(result, result_args):
+                for i_item, i_args in zip(inner_result, inner_args):
+                    new_result.append(item + i_item)
+                    new_args.append(args[:] + i_args)
+            result = new_result
+            result_args = new_args
+    if pos >= last:
+        piece = ''.join(source[last:])
+        for i in range(len(result)):
+            result[i] += piece
+    return result, result_args
+
diff --git a/docs/topics/http/urls.txt b/docs/topics/http/urls.txt
index 1027772515..c8564a3bb8 100644
--- a/docs/topics/http/urls.txt
+++ b/docs/topics/http/urls.txt
@@ -612,6 +612,13 @@ arguments to use in the URL matching. For example::
 
 .. _URL pattern name: `Naming URL patterns`_
 
+The ``reverse()`` function can reverse a large variety of regular expression
+patterns for URLs, but not every possible one. The main restriction at the
+moment is that the pattern cannot contain alternative choices using the
+vertical bar (``"|"``) character. You can quite happily use such patterns for
+matching against incoming URLs and sending them off to views, but you cannot
+reverse such patterns.
+
 permalink()
 -----------
 
diff --git a/tests/regressiontests/templates/tests.py b/tests/regressiontests/templates/tests.py
index 187eb43cf4..e779cea607 100644
--- a/tests/regressiontests/templates/tests.py
+++ b/tests/regressiontests/templates/tests.py
@@ -886,7 +886,8 @@ class Templates(unittest.TestCase):
             ### URL TAG ########################################################
             # Successes
             'url01': ('{% url regressiontests.templates.views.client client.id %}', {'client': {'id': 1}}, '/url_tag/client/1/'),
-            'url02': ('{% url regressiontests.templates.views.client_action client.id, action="update" %}', {'client': {'id': 1}}, '/url_tag/client/1/update/'),
+            'url02': ('{% url regressiontests.templates.views.client_action id=client.id,action="update" %}', {'client': {'id': 1}}, '/url_tag/client/1/update/'),
+            'url02a': ('{% url regressiontests.templates.views.client_action client.id,"update" %}', {'client': {'id': 1}}, '/url_tag/client/1/update/'),
             'url03': ('{% url regressiontests.templates.views.index %}', {}, '/url_tag/'),
             'url04': ('{% url named.client client.id %}', {'client': {'id': 1}}, '/url_tag/named-client/1/'),
             'url05': (u'{% url метка_оператора v %}', {'v': u'Ω'}, '/url_tag/%D0%AE%D0%BD%D0%B8%D0%BA%D0%BE%D0%B4/%CE%A9/'),
diff --git a/tests/regressiontests/urlpatterns_reverse/included_urls.py b/tests/regressiontests/urlpatterns_reverse/included_urls.py
new file mode 100644
index 0000000000..f8acf342f2
--- /dev/null
+++ b/tests/regressiontests/urlpatterns_reverse/included_urls.py
@@ -0,0 +1,8 @@
+from django.conf.urls.defaults import *
+from views import empty_view
+
+urlpatterns = patterns('',
+    url(r'^$', empty_view, name="inner-nothing"),
+    url(r'^extra/(?P<extra>\w+)/$', empty_view, name="inner-extra"),
+    url(r'^(?P<one>\d+)|(?P<two>\d+)/$', empty_view, name="inner-disjunction"),
+)
diff --git a/tests/regressiontests/urlpatterns_reverse/tests.py b/tests/regressiontests/urlpatterns_reverse/tests.py
index 610c3f7bc7..fbea3cb570 100644
--- a/tests/regressiontests/urlpatterns_reverse/tests.py
+++ b/tests/regressiontests/urlpatterns_reverse/tests.py
@@ -1,40 +1,77 @@
-"Unit tests for reverse URL lookup"
+"""
+Unit tests for reverse URL lookups.
+"""
 
-from django.core.urlresolvers import reverse_helper, NoReverseMatch
-import re, unittest
+from django.core.urlresolvers import reverse, NoReverseMatch
+from django.test import TestCase
 
 test_data = (
-    ('^places/(\d+)/$', 'places/3/', [3], {}),
-    ('^places/(\d+)/$', 'places/3/', ['3'], {}),
-    ('^places/(\d+)/$', NoReverseMatch, ['a'], {}),
-    ('^places/(\d+)/$', NoReverseMatch, [], {}),
-    ('^places/(?P<id>\d+)/$', 'places/3/', [], {'id': 3}),
-    ('^people/(?P<name>\w+)/$', 'people/adrian/', ['adrian'], {}),
-    ('^people/(?P<name>\w+)/$', 'people/adrian/', [], {'name': 'adrian'}),
-    ('^people/(?P<name>\w+)/$', NoReverseMatch, ['name with spaces'], {}),
-    ('^people/(?P<name>\w+)/$', NoReverseMatch, [], {'name': 'name with spaces'}),
-    ('^people/(?P<name>\w+)/$', NoReverseMatch, [], {}),
-    ('^hardcoded/$', 'hardcoded/', [], {}),
-    ('^hardcoded/$', 'hardcoded/', ['any arg'], {}),
-    ('^hardcoded/$', 'hardcoded/', [], {'kwarg': 'foo'}),
-    ('^hardcoded/doc\\.pdf$', 'hardcoded/doc.pdf', [], {}),
-    ('^people/(?P<state>\w\w)/(?P<name>\w+)/$', 'people/il/adrian/', [], {'state': 'il', 'name': 'adrian'}),
-    ('^people/(?P<state>\w\w)/(?P<name>\d)/$', NoReverseMatch, [], {'state': 'il', 'name': 'adrian'}),
-    ('^people/(?P<state>\w\w)/(?P<name>\w+)/$', NoReverseMatch, [], {'state': 'il'}),
-    ('^people/(?P<state>\w\w)/(?P<name>\w+)/$', NoReverseMatch, [], {'name': 'adrian'}),
-    ('^people/(?P<state>\w\w)/(\w+)/$', NoReverseMatch, ['il'], {'name': 'adrian'}),
-    ('^people/(?P<state>\w\w)/(\w+)/$', 'people/il/adrian/', ['adrian'], {'state': 'il'}),
+    ('places', '/places/3/', [3], {}),
+    ('places', '/places/3/', ['3'], {}),
+    ('places', NoReverseMatch, ['a'], {}),
+    ('places', NoReverseMatch, [], {}),
+    ('places?', '/place/', [], {}),
+    ('places+', '/places/', [], {}),
+    ('places*', '/place/', [], {}),
+    ('places2?', '/', [], {}),
+    ('places2+', '/places/', [], {}),
+    ('places2*', '/', [], {}),
+    ('places3', '/places/4/', [4], {}),
+    ('places3', '/places/harlem/', ['harlem'], {}),
+    ('places3', NoReverseMatch, ['harlem64'], {}),
+    ('places4', '/places/3/', [], {'id': 3}),
+    ('people', NoReverseMatch, [], {}),
+    ('people', '/people/adrian/', ['adrian'], {}),
+    ('people', '/people/adrian/', [], {'name': 'adrian'}),
+    ('people', NoReverseMatch, ['name with spaces'], {}),
+    ('people', NoReverseMatch, [], {'name': 'name with spaces'}),
+    ('people2', '/people/name/', [], {}),
+    ('people2a', '/people/name/fred/', ['fred'], {}),
+    ('optional', '/optional/fred/', [], {'name': 'fred'}),
+    ('optional', '/optional/fred/', ['fred'], {}),
+    ('hardcoded', '/hardcoded/', [], {}),
+    ('hardcoded2', '/hardcoded/doc.pdf', [], {}),
+    ('people3', '/people/il/adrian/', [], {'state': 'il', 'name': 'adrian'}),
+    ('people3', NoReverseMatch, [], {'state': 'il'}),
+    ('people3', NoReverseMatch, [], {'name': 'adrian'}),
+    ('people4', NoReverseMatch, [], {'state': 'il', 'name': 'adrian'}),
+    ('people6', '/people/il/test/adrian/', ['il/test', 'adrian'], {}),
+    ('people6', '/people//adrian/', ['adrian'], {}),
+    ('range', '/character_set/a/', [], {}),
+    ('range2', '/character_set/x/', [], {}),
+    ('price', '/price/$10/', ['10'], {}),
+    ('price2', '/price/$10/', ['10'], {}),
+    ('price3', '/price/$10/', ['10'], {}),
+    ('product', '/product/chocolate+($2.00)/', [], {'price': '2.00', 'product': 'chocolate'}),
+    ('headlines', '/headlines/2007.5.21/', [], dict(year=2007, month=5, day=21)),
+    ('windows', r'/windows_path/C:%5CDocuments%20and%20Settings%5Cspam/', [], dict(drive_name='C', path=r'Documents and Settings\spam')),
+    ('special', r'/special_chars/+%5C$*/', [r'+\$*'], {}),
+    ('special', NoReverseMatch, [''], {}),
+    ('mixed', '/john/0/', [], {'name': 'john'}),
+    ('repeats', '/repeats/a/', [], {}),
+    ('repeats2', '/repeats/aa/', [], {}),
+    ('insensitive', '/CaseInsensitive/fred', ['fred'], {}),
+    ('test', '/test/1', [], {}),
+    ('test2', '/test/2', [], {}),
+    ('inner-nothing', '/outer/42/', [], {'outer': '42'}),
+    ('inner-nothing', '/outer/42/', ['42'], {}),
+    ('inner-nothing', NoReverseMatch, ['foo'], {}),
+    ('inner-extra', '/outer/42/extra/inner/', [], {'extra': 'inner', 'outer': '42'}),
+    ('inner-extra', '/outer/42/extra/inner/', ['42', 'inner'], {}),
+    ('inner-extra', NoReverseMatch, ['fred', 'inner'], {}),
+    ('disjunction', NoReverseMatch, ['foo'], {}),
+    ('inner-disjunction', NoReverseMatch, ['10', '11'], {}),
 )
 
-class URLPatternReverse(unittest.TestCase):
+class URLPatternReverse(TestCase):
+    urls = 'regressiontests.urlpatterns_reverse.urls'
+
     def test_urlpattern_reverse(self):
-        for regex, expected, args, kwargs in test_data:
+        for name, expected, args, kwargs in test_data:
             try:
-                got = reverse_helper(re.compile(regex), *args, **kwargs)
+                got = reverse(name, args=args, kwargs=kwargs)
             except NoReverseMatch, e:
                 self.assertEqual(expected, NoReverseMatch)
             else:
                 self.assertEquals(got, expected)
 
-if __name__ == "__main__":
-    run_tests(1)
diff --git a/tests/regressiontests/urlpatterns_reverse/urls.py b/tests/regressiontests/urlpatterns_reverse/urls.py
new file mode 100644
index 0000000000..21f2ddafb7
--- /dev/null
+++ b/tests/regressiontests/urlpatterns_reverse/urls.py
@@ -0,0 +1,46 @@
+from django.conf.urls.defaults import *
+from views import empty_view
+
+urlpatterns = patterns('',
+    url(r'^places/(\d+)/$', empty_view, name='places'),
+    url(r'^places?/$', empty_view, name="places?"),
+    url(r'^places+/$', empty_view, name="places+"),
+    url(r'^places*/$', empty_view, name="places*"),
+    url(r'^(?:places/)?$', empty_view, name="places2?"),
+    url(r'^(?:places/)+$', empty_view, name="places2+"),
+    url(r'^(?:places/)*$', empty_view, name="places2*"),
+    url(r'^places/(\d+|[a-z_]+)/', empty_view, name="places3"),
+    url(r'^places/(?P<id>\d+)/$', empty_view, name="places4"),
+    url(r'^people/(?P<name>\w+)/$', empty_view, name="people"),
+    url(r'^people/(?:name/)', empty_view, name="people2"),
+    url(r'^people/(?:name/(\w+)/)?', empty_view, name="people2a"),
+    url(r'^optional/(?P<name>.*)/(?:.+/)?', empty_view, name="optional"),
+    url(r'^hardcoded/$', 'hardcoded/', empty_view, name="hardcoded"),
+    url(r'^hardcoded/doc\.pdf$', empty_view, name="hardcoded2"),
+    url(r'^people/(?P<state>\w\w)/(?P<name>\w+)/$', empty_view, name="people3"),
+    url(r'^people/(?P<state>\w\w)/(?P<name>\d)/$', empty_view, name="people4"),
+    url(r'^people/((?P<state>\w\w)/test)?/(\w+)/$', empty_view, name="people6"),
+    url(r'^character_set/[abcdef0-9]/$', empty_view, name="range"),
+    url(r'^character_set/[\w]/$', empty_view, name="range2"),
+    url(r'^price/\$(\d+)/$', empty_view, name="price"),
+    url(r'^price/[$](\d+)/$', empty_view, name="price2"),
+    url(r'^price/[\$](\d+)/$', empty_view, name="price3"),
+    url(r'^product/(?P<product>\w+)\+\(\$(?P<price>\d+(\.\d+)?)\)/$',
+            empty_view, name="product"),
+    url(r'^headlines/(?P<year>\d+)\.(?P<month>\d+)\.(?P<day>\d+)/$', empty_view,
+            name="headlines"),
+    url(r'^windows_path/(?P<drive_name>[A-Z]):\\(?P<path>.+)/$', empty_view,
+            name="windows"),
+    url(r'^special_chars/(.+)/$', empty_view, name="special"),
+    url(r'^(?P<name>.+)/\d+/$', empty_view, name="mixed"),
+    url(r'^repeats/a{1,2}/$', empty_view, name="repeats"),
+    url(r'^repeats/a{2,4}/$', empty_view, name="repeats2"),
+    url(r'^(?i)CaseInsensitive/(\w+)', empty_view, name="insensitive"),
+    url(r'^test/1/?', empty_view, name="test"),
+    url(r'^(?i)test/2/?$', empty_view, name="test2"),
+    url(r'^outer/(?P<outer>\d+)/',
+            include('regressiontests.urlpatterns_reverse.included_urls')),
+
+    # This is non-reversible, but we shouldn't blow up when parsing it.
+    url(r'^(?:foo|bar)(\w+)/$', empty_view, name="disjunction"),
+)
diff --git a/tests/regressiontests/urlpatterns_reverse/views.py b/tests/regressiontests/urlpatterns_reverse/views.py
new file mode 100644
index 0000000000..65be705023
--- /dev/null
+++ b/tests/regressiontests/urlpatterns_reverse/views.py
@@ -0,0 +1,2 @@
+def empty_view(request, *args, **kwargs):
+    pass