queryset-refactor: Changed the way order_by() and distinct() interact.

When using "select distinct" all ordering columns must be part of the output (select) columns. We were previously just throwing away ordering columns that weren't included, but there are some cases where they are needed and it's difficult to add them in manually. So now the default behaviour is to append any missing columns. This can affect the output of distinct() if complicated order_by() constructs are used, so the documentation has been updated with an explanation of what's going on there. Fixed #7070. git-svn-id: http://code.djangoproject.com/svn/django/branches/queryset-refactor@7455 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2025-07-05 10:19:20 +00:00 · 2008-04-24 16:07:07 +00:00 · 2008-04-24 16:07:07 +00:00 · 8a4e1de8b0
commit 8a4e1de8b0
parent dd0c5855e9
4 changed files with 72 additions and 19 deletions
--- a/django/db/models/sql/query.py
+++ b/django/db/models/sql/query.py
@ -52,6 +52,7 @@ class Query(object):
        self.default_cols = True
        self.default_ordering = True
        self.standard_ordering = True
        self.ordering_aliases = []
        self.start_meta = None
        # SQL-related attributes
@ -139,6 +140,7 @@ class Query(object):
        obj.default_cols = self.default_cols
        obj.default_ordering = self.default_ordering
        obj.standard_ordering = self.standard_ordering
        obj.ordering_aliases = []
        obj.start_meta = self.start_meta
        obj.select = self.select[:]
        obj.tables = self.tables[:]
@ -215,16 +217,18 @@ class Query(object):
        self.pre_sql_setup()
        out_cols = self.get_columns()
        ordering = self.get_ordering()
        # This must come after 'select' and 'ordering' -- see docstring of
        # get_from_clause() for details.
        from_, f_params = self.get_from_clause()
        where, w_params = self.where.as_sql(qn=self.quote_name_unless_alias)
        params = list(self.extra_select_params)
        result = ['SELECT']
        if self.distinct:
            result.append('DISTINCT')
-        result.append(', '.join(out_cols))
+        result.append(', '.join(out_cols + self.ordering_aliases))
        result.append('FROM')
        result.extend(from_)
@ -465,15 +469,13 @@ class Query(object):
    def get_ordering(self):
        """
-        Returns a tuple representing the SQL elements in the "order by" clause.
+        Returns list representing the SQL elements in the "order by" clause.
        Also sets the ordering_aliases attribute on this instance to a list of
        extra aliases needed in the select.
        Determining the ordering SQL can change the tables we need to include,
        so this should be run *before* get_from_clause().
        """
        # FIXME: It's an SQL-92 requirement that all ordering columns appear as
        # output columns in the query (in the select statement) or be ordinals.
        # We don't enforce that here, but we should (by adding to the select
        # columns), for portability.
        if self.extra_order_by:
            ordering = self.extra_order_by
        elif not self.default_ordering:
@ -485,6 +487,7 @@ class Query(object):
        distinct = self.distinct
        select_aliases = self._select_aliases
        result = []
        ordering_aliases = []
        if self.standard_ordering:
            asc, desc = ORDER_DIR['ASC']
        else:
@ -515,13 +518,16 @@ class Query(object):
                for table, col, order in self.find_ordering_name(field,
                        self.model._meta, default_order=asc):
                    elt = '%s.%s' % (qn(table), qn2(col))
-                    if not distinct or elt in select_aliases:
+                    if distinct and elt not in select_aliases:
-                        result.append('%s %s' % (elt, order))
+                        ordering_aliases.append(elt)
                    result.append('%s %s' % (elt, order))
            else:
                col, order = get_order_dir(field, asc)
                elt = qn(col)
-                if not distinct or elt in select_aliases:
+                if distinct and elt not in select_aliases:
-                    result.append('%s %s' % (elt, order))
+                    ordering_aliases.append(elt)
                result.append('%s %s' % (elt, order))
        self.ordering_aliases = ordering_aliases
        return result
    def find_ordering_name(self, name, opts, alias=None, default_order='ASC',
@ -1379,8 +1385,14 @@ class Query(object):
        if not result_type:
            return cursor
        if result_type == SINGLE:
            if self.ordering_aliases:
                return cursor.fetchone()[:-len(results.ordering_aliases)]
            return cursor.fetchone()
        # The MULTI case.
        if self.ordering_aliases:
            return order_modified_iter(cursor, len(self.ordering_aliases),
                    self.connection.features.empty_fetchmany_value)
        return iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)),
                self.connection.features.empty_fetchmany_value)
@ -1408,6 +1420,17 @@ def empty_iter():
    """
    yield iter([]).next()
 def order_modified_iter(cursor, trim, sentinel):
    """
    Yields blocks of rows from a cursor. We use this iterator in the special
    case when extra output columns have been added to support ordering
    requirements. We must trim those extra columns before anything else can use
    the results, since they're only needed to make the SQL valid.
    """
    for rows in iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)),
            sentinel):
        yield [r[:-trim] for r in rows]
 def setup_join_cache(sender):
    """
    The information needed to join between model fields is something that is
--- a/docs/db-api.txt
+++ b/docs/db-api.txt
@ -510,6 +510,10 @@ primary key if there is no ``Meta.ordering`` specified. For example::
 ...since the ``Blog`` model has no default ordering specified.
 Be cautious when ordering by fields in related models if you are also using
 ``distinct()``. See the note in the `distinct()`_ section for an explanation
 of how related model ordering can change the expected results.
 It is permissible to specify a multi-valued field to order the results by (for
 example, a ``ManyToMany`` field). Normally this won't be a sensible thing to
 do and it's really an advanced usage feature. However, if you know that your
@ -559,10 +563,28 @@ eliminates duplicate rows from the query results.
 By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this
 is rarely a problem, because simple queries such as ``Blog.objects.all()``
-don't introduce the possibility of duplicate result rows.
+don't introduce the possibility of duplicate result rows. However, if your
 query spans multiple tables, it's possible to get duplicate results when a
 ``QuerySet`` is evaluated. That's when you'd use ``distinct()``.
-However, if your query spans multiple tables, it's possible to get duplicate
+.. note::
-results when a ``QuerySet`` is evaluated. That's when you'd use ``distinct()``.
+    Any fields used in an ``order_by()`` call are included in the SQL
    ``SELECT`` columns. This can sometimes lead to unexpected results when
    used in conjuntion with ``distinct()``. If you order by fields from a
    related model, those fields will be added to the selected columns and they
    may make otherwise duplicate rows appear to be distinct. Since the extra
    columns don't appear in the returned results (they are only there to
    support ordering), it sometimes looks like non-distinct results are being
    returned.
    Similarly, if you use a ``values()`` query to restrict the columns
    selected, the columns used in any ``order_by()`` (or default model
    ordering) will still be involved and may affect uniqueness of the results.
    The moral here is that if you are using ``distinct()`` be careful about
    ordering by related models. Similarly, when using ``distinct()`` and
    ``values()`` together, be careful when ordering by fields not in the
    ``values()`` call.
 ``values(*fields)``
 ~~~~~~~~~~~~~~~~~~~
@ -627,6 +649,9 @@ A couple of subtleties that are worth mentioning:
        >>> Entry.objects.values('blog_id')
        [{'blog_id': 1}, ...]
    * When using ``values()`` together with ``distinct()``, be aware that
      ordering can affect the results. See the note in the `distinct()`_
      section, above, for details.
 **New in Django development version:** Previously, it was not possible to pass
 ``blog_id`` to ``values()`` in the above example, only ``blog``.
@ -841,8 +866,7 @@ You can only refer to ``ForeignKey`` relations in the list of fields passed to
 list of fields and the ``depth`` parameter in the same ``select_related()``
 call, since they are conflicting options.
-``extra(select=None, where=None, params=None, tables=None, order_by=None,
+``extra(select=None, where=None, params=None, tables=None, order_by=None, select_params=None)``
 select_params=None)``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Sometimes, the Django query syntax by itself can't easily express a complex
--- a/tests/modeltests/many_to_one/models.py
+++ b/tests/modeltests/many_to_one/models.py
@ -250,9 +250,9 @@ FieldError: Cannot resolve keyword 'reporter_id' into field. Choices are: headli
 >>> Reporter.objects.filter(article__reporter=r).distinct()
 [<Reporter: John Smith>]
-# It's possible to use values() calls across many-to-one relations.
+# It's possible to use values() calls across many-to-one relations. (Note, too, that we clear the ordering here so as not to drag the 'headline' field into the columns being used to determine uniqueness.)
 >>> d = {'reporter__first_name': u'John', 'reporter__last_name': u'Smith'}
->>> list(Article.objects.filter(reporter=r).distinct().values('reporter__first_name', 'reporter__last_name')) == [d]
+>>> list(Article.objects.filter(reporter=r).distinct().order_by().values('reporter__first_name', 'reporter__last_name')) == [d]
 True
 # If you delete a reporter, his articles will be deleted.
--- a/tests/regressiontests/queries/models.py
+++ b/tests/regressiontests/queries/models.py
@ -498,9 +498,15 @@ Bug #3037
 >>> Item.objects.filter(Q(creator__name='a3', name='two')|Q(creator__name='a4', name='four'))
 [<Item: four>]
-Bug #5321
+Bug #5321, #7070
 Ordering columns must be included in the output columns. Note that this means
 results that might otherwise be distinct are not (if there are multiple values
 in the ordering cols), as in this example. This isn't a bug; it's a warning to
 be careful with the selection of ordering columns.
 >>> Note.objects.values('misc').distinct().order_by('note', '-misc')
-[{'misc': u'foo'}, {'misc': u'bar'}]
+[{'misc': u'foo'}, {'misc': u'bar'}, {'misc': u'foo'}]
 Bug #4358
 If you don't pass any fields to values(), relation fields are returned as