1
0
mirror of https://github.com/django/django.git synced 2025-07-05 10:19:20 +00:00

queryset-refactor: Changed the way order_by() and distinct() interact.

When using "select distinct" all ordering columns must be part of the output
(select) columns. We were previously just throwing away ordering columns that
weren't included, but there are some cases where they are needed and it's
difficult to add them in manually. So now the default behaviour is to append
any missing columns.

This can affect the output of distinct() if complicated order_by() constructs
are used, so the documentation has been updated with an explanation of what's
going on there.

Fixed #7070.


git-svn-id: http://code.djangoproject.com/svn/django/branches/queryset-refactor@7455 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Malcolm Tredinnick 2008-04-24 16:07:07 +00:00
parent dd0c5855e9
commit 8a4e1de8b0
4 changed files with 72 additions and 19 deletions

View File

@ -52,6 +52,7 @@ class Query(object):
self.default_cols = True self.default_cols = True
self.default_ordering = True self.default_ordering = True
self.standard_ordering = True self.standard_ordering = True
self.ordering_aliases = []
self.start_meta = None self.start_meta = None
# SQL-related attributes # SQL-related attributes
@ -139,6 +140,7 @@ class Query(object):
obj.default_cols = self.default_cols obj.default_cols = self.default_cols
obj.default_ordering = self.default_ordering obj.default_ordering = self.default_ordering
obj.standard_ordering = self.standard_ordering obj.standard_ordering = self.standard_ordering
obj.ordering_aliases = []
obj.start_meta = self.start_meta obj.start_meta = self.start_meta
obj.select = self.select[:] obj.select = self.select[:]
obj.tables = self.tables[:] obj.tables = self.tables[:]
@ -215,16 +217,18 @@ class Query(object):
self.pre_sql_setup() self.pre_sql_setup()
out_cols = self.get_columns() out_cols = self.get_columns()
ordering = self.get_ordering() ordering = self.get_ordering()
# This must come after 'select' and 'ordering' -- see docstring of # This must come after 'select' and 'ordering' -- see docstring of
# get_from_clause() for details. # get_from_clause() for details.
from_, f_params = self.get_from_clause() from_, f_params = self.get_from_clause()
where, w_params = self.where.as_sql(qn=self.quote_name_unless_alias) where, w_params = self.where.as_sql(qn=self.quote_name_unless_alias)
params = list(self.extra_select_params) params = list(self.extra_select_params)
result = ['SELECT'] result = ['SELECT']
if self.distinct: if self.distinct:
result.append('DISTINCT') result.append('DISTINCT')
result.append(', '.join(out_cols)) result.append(', '.join(out_cols + self.ordering_aliases))
result.append('FROM') result.append('FROM')
result.extend(from_) result.extend(from_)
@ -465,15 +469,13 @@ class Query(object):
def get_ordering(self): def get_ordering(self):
""" """
Returns a tuple representing the SQL elements in the "order by" clause. Returns list representing the SQL elements in the "order by" clause.
Also sets the ordering_aliases attribute on this instance to a list of
extra aliases needed in the select.
Determining the ordering SQL can change the tables we need to include, Determining the ordering SQL can change the tables we need to include,
so this should be run *before* get_from_clause(). so this should be run *before* get_from_clause().
""" """
# FIXME: It's an SQL-92 requirement that all ordering columns appear as
# output columns in the query (in the select statement) or be ordinals.
# We don't enforce that here, but we should (by adding to the select
# columns), for portability.
if self.extra_order_by: if self.extra_order_by:
ordering = self.extra_order_by ordering = self.extra_order_by
elif not self.default_ordering: elif not self.default_ordering:
@ -485,6 +487,7 @@ class Query(object):
distinct = self.distinct distinct = self.distinct
select_aliases = self._select_aliases select_aliases = self._select_aliases
result = [] result = []
ordering_aliases = []
if self.standard_ordering: if self.standard_ordering:
asc, desc = ORDER_DIR['ASC'] asc, desc = ORDER_DIR['ASC']
else: else:
@ -515,13 +518,16 @@ class Query(object):
for table, col, order in self.find_ordering_name(field, for table, col, order in self.find_ordering_name(field,
self.model._meta, default_order=asc): self.model._meta, default_order=asc):
elt = '%s.%s' % (qn(table), qn2(col)) elt = '%s.%s' % (qn(table), qn2(col))
if not distinct or elt in select_aliases: if distinct and elt not in select_aliases:
result.append('%s %s' % (elt, order)) ordering_aliases.append(elt)
result.append('%s %s' % (elt, order))
else: else:
col, order = get_order_dir(field, asc) col, order = get_order_dir(field, asc)
elt = qn(col) elt = qn(col)
if not distinct or elt in select_aliases: if distinct and elt not in select_aliases:
result.append('%s %s' % (elt, order)) ordering_aliases.append(elt)
result.append('%s %s' % (elt, order))
self.ordering_aliases = ordering_aliases
return result return result
def find_ordering_name(self, name, opts, alias=None, default_order='ASC', def find_ordering_name(self, name, opts, alias=None, default_order='ASC',
@ -1379,8 +1385,14 @@ class Query(object):
if not result_type: if not result_type:
return cursor return cursor
if result_type == SINGLE: if result_type == SINGLE:
if self.ordering_aliases:
return cursor.fetchone()[:-len(results.ordering_aliases)]
return cursor.fetchone() return cursor.fetchone()
# The MULTI case. # The MULTI case.
if self.ordering_aliases:
return order_modified_iter(cursor, len(self.ordering_aliases),
self.connection.features.empty_fetchmany_value)
return iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)), return iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)),
self.connection.features.empty_fetchmany_value) self.connection.features.empty_fetchmany_value)
@ -1408,6 +1420,17 @@ def empty_iter():
""" """
yield iter([]).next() yield iter([]).next()
def order_modified_iter(cursor, trim, sentinel):
"""
Yields blocks of rows from a cursor. We use this iterator in the special
case when extra output columns have been added to support ordering
requirements. We must trim those extra columns before anything else can use
the results, since they're only needed to make the SQL valid.
"""
for rows in iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)),
sentinel):
yield [r[:-trim] for r in rows]
def setup_join_cache(sender): def setup_join_cache(sender):
""" """
The information needed to join between model fields is something that is The information needed to join between model fields is something that is

View File

@ -510,6 +510,10 @@ primary key if there is no ``Meta.ordering`` specified. For example::
...since the ``Blog`` model has no default ordering specified. ...since the ``Blog`` model has no default ordering specified.
Be cautious when ordering by fields in related models if you are also using
``distinct()``. See the note in the `distinct()`_ section for an explanation
of how related model ordering can change the expected results.
It is permissible to specify a multi-valued field to order the results by (for It is permissible to specify a multi-valued field to order the results by (for
example, a ``ManyToMany`` field). Normally this won't be a sensible thing to example, a ``ManyToMany`` field). Normally this won't be a sensible thing to
do and it's really an advanced usage feature. However, if you know that your do and it's really an advanced usage feature. However, if you know that your
@ -559,10 +563,28 @@ eliminates duplicate rows from the query results.
By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this
is rarely a problem, because simple queries such as ``Blog.objects.all()`` is rarely a problem, because simple queries such as ``Blog.objects.all()``
don't introduce the possibility of duplicate result rows. don't introduce the possibility of duplicate result rows. However, if your
query spans multiple tables, it's possible to get duplicate results when a
``QuerySet`` is evaluated. That's when you'd use ``distinct()``.
However, if your query spans multiple tables, it's possible to get duplicate .. note::
results when a ``QuerySet`` is evaluated. That's when you'd use ``distinct()``. Any fields used in an ``order_by()`` call are included in the SQL
``SELECT`` columns. This can sometimes lead to unexpected results when
used in conjuntion with ``distinct()``. If you order by fields from a
related model, those fields will be added to the selected columns and they
may make otherwise duplicate rows appear to be distinct. Since the extra
columns don't appear in the returned results (they are only there to
support ordering), it sometimes looks like non-distinct results are being
returned.
Similarly, if you use a ``values()`` query to restrict the columns
selected, the columns used in any ``order_by()`` (or default model
ordering) will still be involved and may affect uniqueness of the results.
The moral here is that if you are using ``distinct()`` be careful about
ordering by related models. Similarly, when using ``distinct()`` and
``values()`` together, be careful when ordering by fields not in the
``values()`` call.
``values(*fields)`` ``values(*fields)``
~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~
@ -627,6 +649,9 @@ A couple of subtleties that are worth mentioning:
>>> Entry.objects.values('blog_id') >>> Entry.objects.values('blog_id')
[{'blog_id': 1}, ...] [{'blog_id': 1}, ...]
* When using ``values()`` together with ``distinct()``, be aware that
ordering can affect the results. See the note in the `distinct()`_
section, above, for details.
**New in Django development version:** Previously, it was not possible to pass **New in Django development version:** Previously, it was not possible to pass
``blog_id`` to ``values()`` in the above example, only ``blog``. ``blog_id`` to ``values()`` in the above example, only ``blog``.
@ -841,8 +866,7 @@ You can only refer to ``ForeignKey`` relations in the list of fields passed to
list of fields and the ``depth`` parameter in the same ``select_related()`` list of fields and the ``depth`` parameter in the same ``select_related()``
call, since they are conflicting options. call, since they are conflicting options.
``extra(select=None, where=None, params=None, tables=None, order_by=None, ``extra(select=None, where=None, params=None, tables=None, order_by=None, select_params=None)``
select_params=None)``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Sometimes, the Django query syntax by itself can't easily express a complex Sometimes, the Django query syntax by itself can't easily express a complex

View File

@ -250,9 +250,9 @@ FieldError: Cannot resolve keyword 'reporter_id' into field. Choices are: headli
>>> Reporter.objects.filter(article__reporter=r).distinct() >>> Reporter.objects.filter(article__reporter=r).distinct()
[<Reporter: John Smith>] [<Reporter: John Smith>]
# It's possible to use values() calls across many-to-one relations. # It's possible to use values() calls across many-to-one relations. (Note, too, that we clear the ordering here so as not to drag the 'headline' field into the columns being used to determine uniqueness.)
>>> d = {'reporter__first_name': u'John', 'reporter__last_name': u'Smith'} >>> d = {'reporter__first_name': u'John', 'reporter__last_name': u'Smith'}
>>> list(Article.objects.filter(reporter=r).distinct().values('reporter__first_name', 'reporter__last_name')) == [d] >>> list(Article.objects.filter(reporter=r).distinct().order_by().values('reporter__first_name', 'reporter__last_name')) == [d]
True True
# If you delete a reporter, his articles will be deleted. # If you delete a reporter, his articles will be deleted.

View File

@ -498,9 +498,15 @@ Bug #3037
>>> Item.objects.filter(Q(creator__name='a3', name='two')|Q(creator__name='a4', name='four')) >>> Item.objects.filter(Q(creator__name='a3', name='two')|Q(creator__name='a4', name='four'))
[<Item: four>] [<Item: four>]
Bug #5321 Bug #5321, #7070
Ordering columns must be included in the output columns. Note that this means
results that might otherwise be distinct are not (if there are multiple values
in the ordering cols), as in this example. This isn't a bug; it's a warning to
be careful with the selection of ordering columns.
>>> Note.objects.values('misc').distinct().order_by('note', '-misc') >>> Note.objects.values('misc').distinct().order_by('note', '-misc')
[{'misc': u'foo'}, {'misc': u'bar'}] [{'misc': u'foo'}, {'misc': u'bar'}, {'misc': u'foo'}]
Bug #4358 Bug #4358
If you don't pass any fields to values(), relation fields are returned as If you don't pass any fields to values(), relation fields are returned as