diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py index 1c22593293..2dba2bdb3c 100644 --- a/django/db/models/sql/query.py +++ b/django/db/models/sql/query.py @@ -52,6 +52,7 @@ class Query(object): self.default_cols = True self.default_ordering = True self.standard_ordering = True + self.ordering_aliases = [] self.start_meta = None # SQL-related attributes @@ -139,6 +140,7 @@ class Query(object): obj.default_cols = self.default_cols obj.default_ordering = self.default_ordering obj.standard_ordering = self.standard_ordering + obj.ordering_aliases = [] obj.start_meta = self.start_meta obj.select = self.select[:] obj.tables = self.tables[:] @@ -215,16 +217,18 @@ class Query(object): self.pre_sql_setup() out_cols = self.get_columns() ordering = self.get_ordering() + # This must come after 'select' and 'ordering' -- see docstring of # get_from_clause() for details. from_, f_params = self.get_from_clause() + where, w_params = self.where.as_sql(qn=self.quote_name_unless_alias) params = list(self.extra_select_params) result = ['SELECT'] if self.distinct: result.append('DISTINCT') - result.append(', '.join(out_cols)) + result.append(', '.join(out_cols + self.ordering_aliases)) result.append('FROM') result.extend(from_) @@ -465,15 +469,13 @@ class Query(object): def get_ordering(self): """ - Returns a tuple representing the SQL elements in the "order by" clause. + Returns list representing the SQL elements in the "order by" clause. + Also sets the ordering_aliases attribute on this instance to a list of + extra aliases needed in the select. Determining the ordering SQL can change the tables we need to include, so this should be run *before* get_from_clause(). """ - # FIXME: It's an SQL-92 requirement that all ordering columns appear as - # output columns in the query (in the select statement) or be ordinals. - # We don't enforce that here, but we should (by adding to the select - # columns), for portability. if self.extra_order_by: ordering = self.extra_order_by elif not self.default_ordering: @@ -485,6 +487,7 @@ class Query(object): distinct = self.distinct select_aliases = self._select_aliases result = [] + ordering_aliases = [] if self.standard_ordering: asc, desc = ORDER_DIR['ASC'] else: @@ -515,13 +518,16 @@ class Query(object): for table, col, order in self.find_ordering_name(field, self.model._meta, default_order=asc): elt = '%s.%s' % (qn(table), qn2(col)) - if not distinct or elt in select_aliases: - result.append('%s %s' % (elt, order)) + if distinct and elt not in select_aliases: + ordering_aliases.append(elt) + result.append('%s %s' % (elt, order)) else: col, order = get_order_dir(field, asc) elt = qn(col) - if not distinct or elt in select_aliases: - result.append('%s %s' % (elt, order)) + if distinct and elt not in select_aliases: + ordering_aliases.append(elt) + result.append('%s %s' % (elt, order)) + self.ordering_aliases = ordering_aliases return result def find_ordering_name(self, name, opts, alias=None, default_order='ASC', @@ -1379,8 +1385,14 @@ class Query(object): if not result_type: return cursor if result_type == SINGLE: + if self.ordering_aliases: + return cursor.fetchone()[:-len(results.ordering_aliases)] return cursor.fetchone() + # The MULTI case. + if self.ordering_aliases: + return order_modified_iter(cursor, len(self.ordering_aliases), + self.connection.features.empty_fetchmany_value) return iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)), self.connection.features.empty_fetchmany_value) @@ -1408,6 +1420,17 @@ def empty_iter(): """ yield iter([]).next() +def order_modified_iter(cursor, trim, sentinel): + """ + Yields blocks of rows from a cursor. We use this iterator in the special + case when extra output columns have been added to support ordering + requirements. We must trim those extra columns before anything else can use + the results, since they're only needed to make the SQL valid. + """ + for rows in iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)), + sentinel): + yield [r[:-trim] for r in rows] + def setup_join_cache(sender): """ The information needed to join between model fields is something that is diff --git a/docs/db-api.txt b/docs/db-api.txt index 20dbd38ebc..6a3fe88080 100644 --- a/docs/db-api.txt +++ b/docs/db-api.txt @@ -510,6 +510,10 @@ primary key if there is no ``Meta.ordering`` specified. For example:: ...since the ``Blog`` model has no default ordering specified. +Be cautious when ordering by fields in related models if you are also using +``distinct()``. See the note in the `distinct()`_ section for an explanation +of how related model ordering can change the expected results. + It is permissible to specify a multi-valued field to order the results by (for example, a ``ManyToMany`` field). Normally this won't be a sensible thing to do and it's really an advanced usage feature. However, if you know that your @@ -559,10 +563,28 @@ eliminates duplicate rows from the query results. By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this is rarely a problem, because simple queries such as ``Blog.objects.all()`` -don't introduce the possibility of duplicate result rows. +don't introduce the possibility of duplicate result rows. However, if your +query spans multiple tables, it's possible to get duplicate results when a +``QuerySet`` is evaluated. That's when you'd use ``distinct()``. -However, if your query spans multiple tables, it's possible to get duplicate -results when a ``QuerySet`` is evaluated. That's when you'd use ``distinct()``. +.. note:: + Any fields used in an ``order_by()`` call are included in the SQL + ``SELECT`` columns. This can sometimes lead to unexpected results when + used in conjuntion with ``distinct()``. If you order by fields from a + related model, those fields will be added to the selected columns and they + may make otherwise duplicate rows appear to be distinct. Since the extra + columns don't appear in the returned results (they are only there to + support ordering), it sometimes looks like non-distinct results are being + returned. + + Similarly, if you use a ``values()`` query to restrict the columns + selected, the columns used in any ``order_by()`` (or default model + ordering) will still be involved and may affect uniqueness of the results. + + The moral here is that if you are using ``distinct()`` be careful about + ordering by related models. Similarly, when using ``distinct()`` and + ``values()`` together, be careful when ordering by fields not in the + ``values()`` call. ``values(*fields)`` ~~~~~~~~~~~~~~~~~~~ @@ -627,6 +649,9 @@ A couple of subtleties that are worth mentioning: >>> Entry.objects.values('blog_id') [{'blog_id': 1}, ...] + * When using ``values()`` together with ``distinct()``, be aware that + ordering can affect the results. See the note in the `distinct()`_ + section, above, for details. **New in Django development version:** Previously, it was not possible to pass ``blog_id`` to ``values()`` in the above example, only ``blog``. @@ -841,8 +866,7 @@ You can only refer to ``ForeignKey`` relations in the list of fields passed to list of fields and the ``depth`` parameter in the same ``select_related()`` call, since they are conflicting options. -``extra(select=None, where=None, params=None, tables=None, order_by=None, -select_params=None)`` +``extra(select=None, where=None, params=None, tables=None, order_by=None, select_params=None)`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Sometimes, the Django query syntax by itself can't easily express a complex diff --git a/tests/modeltests/many_to_one/models.py b/tests/modeltests/many_to_one/models.py index d2c8e87fc1..6616f8b55e 100644 --- a/tests/modeltests/many_to_one/models.py +++ b/tests/modeltests/many_to_one/models.py @@ -250,9 +250,9 @@ FieldError: Cannot resolve keyword 'reporter_id' into field. Choices are: headli >>> Reporter.objects.filter(article__reporter=r).distinct() [] -# It's possible to use values() calls across many-to-one relations. +# It's possible to use values() calls across many-to-one relations. (Note, too, that we clear the ordering here so as not to drag the 'headline' field into the columns being used to determine uniqueness.) >>> d = {'reporter__first_name': u'John', 'reporter__last_name': u'Smith'} ->>> list(Article.objects.filter(reporter=r).distinct().values('reporter__first_name', 'reporter__last_name')) == [d] +>>> list(Article.objects.filter(reporter=r).distinct().order_by().values('reporter__first_name', 'reporter__last_name')) == [d] True # If you delete a reporter, his articles will be deleted. diff --git a/tests/regressiontests/queries/models.py b/tests/regressiontests/queries/models.py index d99e828c3b..f893ac6ad8 100644 --- a/tests/regressiontests/queries/models.py +++ b/tests/regressiontests/queries/models.py @@ -498,9 +498,15 @@ Bug #3037 >>> Item.objects.filter(Q(creator__name='a3', name='two')|Q(creator__name='a4', name='four')) [] -Bug #5321 +Bug #5321, #7070 + +Ordering columns must be included in the output columns. Note that this means +results that might otherwise be distinct are not (if there are multiple values +in the ordering cols), as in this example. This isn't a bug; it's a warning to +be careful with the selection of ordering columns. + >>> Note.objects.values('misc').distinct().order_by('note', '-misc') -[{'misc': u'foo'}, {'misc': u'bar'}] +[{'misc': u'foo'}, {'misc': u'bar'}, {'misc': u'foo'}] Bug #4358 If you don't pass any fields to values(), relation fields are returned as