mirror of
				https://github.com/django/django.git
				synced 2025-10-24 22:26:08 +00:00 
			
		
		
		
	Added support for modifying the effect of `DISTINCT` clauses so they
				
					
				
			only consider some fields (PostgreSQL only). For this, the ``distinct()`` QuerySet method now accepts an optional list of model fields names and generates ``DISTINCT ON`` clauses on these cases. Thanks Jeffrey Gelens and Anssi Kääriäinen for their work. Fixes #6422. git-svn-id: http://code.djangoproject.com/svn/django/trunk@17244 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
		
							
								
								
									
										2
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -203,6 +203,7 @@ answer newbie questions, and generally made Django that much better: | |||||||
|     Marc Garcia <marc.garcia@accopensys.com> |     Marc Garcia <marc.garcia@accopensys.com> | ||||||
|     Andy Gayton <andy-django@thecablelounge.com> |     Andy Gayton <andy-django@thecablelounge.com> | ||||||
|     geber@datacollect.com |     geber@datacollect.com | ||||||
|  |     Jeffrey Gelens <jeffrey@gelens.org> | ||||||
|     Baishampayan Ghose |     Baishampayan Ghose | ||||||
|     Joshua Ginsberg <jag@flowtheory.net> |     Joshua Ginsberg <jag@flowtheory.net> | ||||||
|     Dimitris Glezos <dimitris@glezos.com> |     Dimitris Glezos <dimitris@glezos.com> | ||||||
| @@ -269,6 +270,7 @@ answer newbie questions, and generally made Django that much better: | |||||||
|     jpellerin@gmail.com |     jpellerin@gmail.com | ||||||
|     junzhang.jn@gmail.com |     junzhang.jn@gmail.com | ||||||
|     Xia Kai <http://blog.xiaket.org/> |     Xia Kai <http://blog.xiaket.org/> | ||||||
|  |     Anssi Kääriäinen | ||||||
|     Antti Kaihola <http://djangopeople.net/akaihola/> |     Antti Kaihola <http://djangopeople.net/akaihola/> | ||||||
|     Peter van Kampen |     Peter van Kampen | ||||||
|     Bahadır Kandemir <bahadir@pardus.org.tr> |     Bahadır Kandemir <bahadir@pardus.org.tr> | ||||||
|   | |||||||
| @@ -406,6 +406,9 @@ class BaseDatabaseFeatures(object): | |||||||
|     supports_stddev = None |     supports_stddev = None | ||||||
|     can_introspect_foreign_keys = None |     can_introspect_foreign_keys = None | ||||||
|  |  | ||||||
|  |     # Support for the DISTINCT ON clause | ||||||
|  |     can_distinct_on_fields = False | ||||||
|  |  | ||||||
|     def __init__(self, connection): |     def __init__(self, connection): | ||||||
|         self.connection = connection |         self.connection = connection | ||||||
|  |  | ||||||
| @@ -559,6 +562,17 @@ class BaseDatabaseOperations(object): | |||||||
|         """ |         """ | ||||||
|         raise NotImplementedError('Full-text search is not implemented for this database backend') |         raise NotImplementedError('Full-text search is not implemented for this database backend') | ||||||
|  |  | ||||||
|  |     def distinct_sql(self, fields): | ||||||
|  |         """ | ||||||
|  |         Returns an SQL DISTINCT clause which removes duplicate rows from the | ||||||
|  |         result set. If any fields are given, only the given fields are being | ||||||
|  |         checked for duplicates. | ||||||
|  |         """ | ||||||
|  |         if fields: | ||||||
|  |             raise NotImplementedError('DISTINCT ON fields is not supported by this database backend') | ||||||
|  |         else: | ||||||
|  |             return 'DISTINCT' | ||||||
|  |  | ||||||
|     def last_executed_query(self, cursor, sql, params): |     def last_executed_query(self, cursor, sql, params): | ||||||
|         """ |         """ | ||||||
|         Returns a string of the query last executed by the given cursor, with |         Returns a string of the query last executed by the given cursor, with | ||||||
|   | |||||||
| @@ -82,6 +82,7 @@ class DatabaseFeatures(BaseDatabaseFeatures): | |||||||
|     has_select_for_update_nowait = True |     has_select_for_update_nowait = True | ||||||
|     has_bulk_insert = True |     has_bulk_insert = True | ||||||
|     supports_tablespaces = True |     supports_tablespaces = True | ||||||
|  |     can_distinct_on_fields = True | ||||||
|  |  | ||||||
| class DatabaseWrapper(BaseDatabaseWrapper): | class DatabaseWrapper(BaseDatabaseWrapper): | ||||||
|     vendor = 'postgresql' |     vendor = 'postgresql' | ||||||
|   | |||||||
| @@ -179,6 +179,12 @@ class DatabaseOperations(BaseDatabaseOperations): | |||||||
|  |  | ||||||
|         return 63 |         return 63 | ||||||
|  |  | ||||||
|  |     def distinct_sql(self, fields): | ||||||
|  |         if fields: | ||||||
|  |             return 'DISTINCT ON (%s)' % ', '.join(fields) | ||||||
|  |         else: | ||||||
|  |             return 'DISTINCT' | ||||||
|  |  | ||||||
|     def last_executed_query(self, cursor, sql, params): |     def last_executed_query(self, cursor, sql, params): | ||||||
|         # http://initd.org/psycopg/docs/cursor.html#cursor.query |         # http://initd.org/psycopg/docs/cursor.html#cursor.query | ||||||
|         # The query attribute is a Psycopg extension to the DB API 2.0. |         # The query attribute is a Psycopg extension to the DB API 2.0. | ||||||
|   | |||||||
| @@ -323,6 +323,8 @@ class QuerySet(object): | |||||||
|         If args is present the expression is passed as a kwarg using |         If args is present the expression is passed as a kwarg using | ||||||
|         the Aggregate object's default alias. |         the Aggregate object's default alias. | ||||||
|         """ |         """ | ||||||
|  |         if self.query.distinct_fields: | ||||||
|  |             raise NotImplementedError("aggregate() + distinct(fields) not implemented.") | ||||||
|         for arg in args: |         for arg in args: | ||||||
|             kwargs[arg.default_alias] = arg |             kwargs[arg.default_alias] = arg | ||||||
|  |  | ||||||
| @@ -751,12 +753,14 @@ class QuerySet(object): | |||||||
|         obj.query.add_ordering(*field_names) |         obj.query.add_ordering(*field_names) | ||||||
|         return obj |         return obj | ||||||
|  |  | ||||||
|     def distinct(self, true_or_false=True): |     def distinct(self, *field_names): | ||||||
|         """ |         """ | ||||||
|         Returns a new QuerySet instance that will select only distinct results. |         Returns a new QuerySet instance that will select only distinct results. | ||||||
|         """ |         """ | ||||||
|  |         assert self.query.can_filter(), \ | ||||||
|  |                 "Cannot create distinct fields once a slice has been taken." | ||||||
|         obj = self._clone() |         obj = self._clone() | ||||||
|         obj.query.distinct = true_or_false |         obj.query.add_distinct_fields(*field_names) | ||||||
|         return obj |         return obj | ||||||
|  |  | ||||||
|     def extra(self, select=None, where=None, params=None, tables=None, |     def extra(self, select=None, where=None, params=None, tables=None, | ||||||
| @@ -1179,7 +1183,7 @@ class EmptyQuerySet(QuerySet): | |||||||
|         """ |         """ | ||||||
|         return self |         return self | ||||||
|  |  | ||||||
|     def distinct(self, true_or_false=True): |     def distinct(self, fields=None): | ||||||
|         """ |         """ | ||||||
|         Always returns EmptyQuerySet. |         Always returns EmptyQuerySet. | ||||||
|         """ |         """ | ||||||
|   | |||||||
| @@ -23,6 +23,8 @@ class SQLCompiler(object): | |||||||
|         Does any necessary class setup immediately prior to producing SQL. This |         Does any necessary class setup immediately prior to producing SQL. This | ||||||
|         is for things that can't necessarily be done in __init__ because we |         is for things that can't necessarily be done in __init__ because we | ||||||
|         might not have all the pieces in place at that time. |         might not have all the pieces in place at that time. | ||||||
|  |         # TODO: after the query has been executed, the altered state should be | ||||||
|  |         # cleaned. We are not using a clone() of the query here. | ||||||
|         """ |         """ | ||||||
|         if not self.query.tables: |         if not self.query.tables: | ||||||
|             self.query.join((None, self.query.model._meta.db_table, None, None)) |             self.query.join((None, self.query.model._meta.db_table, None, None)) | ||||||
| @@ -60,11 +62,19 @@ class SQLCompiler(object): | |||||||
|             return '', () |             return '', () | ||||||
|  |  | ||||||
|         self.pre_sql_setup() |         self.pre_sql_setup() | ||||||
|  |         # After executing the query, we must get rid of any joins the query | ||||||
|  |         # setup created. So, take note of alias counts before the query ran. | ||||||
|  |         # However we do not want to get rid of stuff done in pre_sql_setup(), | ||||||
|  |         # as the pre_sql_setup will modify query state in a way that forbids | ||||||
|  |         # another run of it. | ||||||
|  |         self.refcounts_before = self.query.alias_refcount.copy() | ||||||
|         out_cols = self.get_columns(with_col_aliases) |         out_cols = self.get_columns(with_col_aliases) | ||||||
|         ordering, ordering_group_by = self.get_ordering() |         ordering, ordering_group_by = self.get_ordering() | ||||||
|  |  | ||||||
|         # This must come after 'select' and 'ordering' -- see docstring of |         distinct_fields = self.get_distinct() | ||||||
|         # get_from_clause() for details. |  | ||||||
|  |         # This must come after 'select', 'ordering' and 'distinct' -- see | ||||||
|  |         # docstring of get_from_clause() for details. | ||||||
|         from_, f_params = self.get_from_clause() |         from_, f_params = self.get_from_clause() | ||||||
|  |  | ||||||
|         qn = self.quote_name_unless_alias |         qn = self.quote_name_unless_alias | ||||||
| @@ -76,8 +86,10 @@ class SQLCompiler(object): | |||||||
|             params.extend(val[1]) |             params.extend(val[1]) | ||||||
|  |  | ||||||
|         result = ['SELECT'] |         result = ['SELECT'] | ||||||
|  |  | ||||||
|         if self.query.distinct: |         if self.query.distinct: | ||||||
|             result.append('DISTINCT') |             result.append(self.connection.ops.distinct_sql(distinct_fields)) | ||||||
|  |  | ||||||
|         result.append(', '.join(out_cols + self.query.ordering_aliases)) |         result.append(', '.join(out_cols + self.query.ordering_aliases)) | ||||||
|  |  | ||||||
|         result.append('FROM') |         result.append('FROM') | ||||||
| @@ -90,6 +102,9 @@ class SQLCompiler(object): | |||||||
|  |  | ||||||
|         grouping, gb_params = self.get_grouping() |         grouping, gb_params = self.get_grouping() | ||||||
|         if grouping: |         if grouping: | ||||||
|  |             if distinct_fields: | ||||||
|  |                 raise NotImplementedError( | ||||||
|  |                     "annotate() + distinct(fields) not implemented.") | ||||||
|             if ordering: |             if ordering: | ||||||
|                 # If the backend can't group by PK (i.e., any database |                 # If the backend can't group by PK (i.e., any database | ||||||
|                 # other than MySQL), then any fields mentioned in the |                 # other than MySQL), then any fields mentioned in the | ||||||
| @@ -129,6 +144,9 @@ class SQLCompiler(object): | |||||||
|                 raise DatabaseError('NOWAIT is not supported on this database backend.') |                 raise DatabaseError('NOWAIT is not supported on this database backend.') | ||||||
|             result.append(self.connection.ops.for_update_sql(nowait=nowait)) |             result.append(self.connection.ops.for_update_sql(nowait=nowait)) | ||||||
|  |  | ||||||
|  |         # Finally do cleanup - get rid of the joins we created above. | ||||||
|  |         self.query.reset_refcounts(self.refcounts_before) | ||||||
|  |  | ||||||
|         return ' '.join(result), tuple(params) |         return ' '.join(result), tuple(params) | ||||||
|  |  | ||||||
|     def as_nested_sql(self): |     def as_nested_sql(self): | ||||||
| @@ -292,6 +310,26 @@ class SQLCompiler(object): | |||||||
|                     col_aliases.add(field.column) |                     col_aliases.add(field.column) | ||||||
|         return result, aliases |         return result, aliases | ||||||
|  |  | ||||||
|  |     def get_distinct(self): | ||||||
|  |         """ | ||||||
|  |         Returns a quoted list of fields to use in DISTINCT ON part of the query. | ||||||
|  |  | ||||||
|  |         Note that this method can alter the tables in the query, and thus it | ||||||
|  |         must be called before get_from_clause(). | ||||||
|  |         """ | ||||||
|  |         qn = self.quote_name_unless_alias | ||||||
|  |         qn2 = self.connection.ops.quote_name | ||||||
|  |         result = [] | ||||||
|  |         opts = self.query.model._meta | ||||||
|  |  | ||||||
|  |         for name in self.query.distinct_fields: | ||||||
|  |             parts = name.split(LOOKUP_SEP) | ||||||
|  |             field, col, alias, _, _ = self._setup_joins(parts, opts, None) | ||||||
|  |             col, alias = self._final_join_removal(col, alias) | ||||||
|  |             result.append("%s.%s" % (qn(alias), qn2(col))) | ||||||
|  |         return result | ||||||
|  |  | ||||||
|  |  | ||||||
|     def get_ordering(self): |     def get_ordering(self): | ||||||
|         """ |         """ | ||||||
|         Returns a tuple containing a list representing the SQL elements in the |         Returns a tuple containing a list representing the SQL elements in the | ||||||
| @@ -384,21 +422,7 @@ class SQLCompiler(object): | |||||||
|         """ |         """ | ||||||
|         name, order = get_order_dir(name, default_order) |         name, order = get_order_dir(name, default_order) | ||||||
|         pieces = name.split(LOOKUP_SEP) |         pieces = name.split(LOOKUP_SEP) | ||||||
|         if not alias: |         field, col, alias, joins, opts = self._setup_joins(pieces, opts, alias) | ||||||
|             alias = self.query.get_initial_alias() |  | ||||||
|         field, target, opts, joins, last, extra = self.query.setup_joins(pieces, |  | ||||||
|                 opts, alias, False) |  | ||||||
|         alias = joins[-1] |  | ||||||
|         col = target.column |  | ||||||
|         if not field.rel: |  | ||||||
|             # To avoid inadvertent trimming of a necessary alias, use the |  | ||||||
|             # refcount to show that we are referencing a non-relation field on |  | ||||||
|             # the model. |  | ||||||
|             self.query.ref_alias(alias) |  | ||||||
|  |  | ||||||
|         # Must use left outer joins for nullable fields and their relations. |  | ||||||
|         self.query.promote_alias_chain(joins, |  | ||||||
|             self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER) |  | ||||||
|  |  | ||||||
|         # If we get to this point and the field is a relation to another model, |         # If we get to this point and the field is a relation to another model, | ||||||
|         # append the default ordering for that model. |         # append the default ordering for that model. | ||||||
| @@ -416,11 +440,47 @@ class SQLCompiler(object): | |||||||
|                 results.extend(self.find_ordering_name(item, opts, alias, |                 results.extend(self.find_ordering_name(item, opts, alias, | ||||||
|                         order, already_seen)) |                         order, already_seen)) | ||||||
|             return results |             return results | ||||||
|  |         col, alias = self._final_join_removal(col, alias) | ||||||
|  |         return [(alias, col, order)] | ||||||
|  |  | ||||||
|  |     def _setup_joins(self, pieces, opts, alias): | ||||||
|  |         """ | ||||||
|  |         A helper method for get_ordering and get_distinct. This method will | ||||||
|  |         call query.setup_joins, handle refcounts and then promote the joins. | ||||||
|  |  | ||||||
|  |         Note that get_ordering and get_distinct must produce same target | ||||||
|  |         columns on same input, as the prefixes of get_ordering and get_distinct | ||||||
|  |         must match. Executing SQL where this is not true is an error. | ||||||
|  |         """ | ||||||
|  |         if not alias: | ||||||
|  |             alias = self.query.get_initial_alias() | ||||||
|  |         field, target, opts, joins, _, _ = self.query.setup_joins(pieces, | ||||||
|  |                 opts, alias, False) | ||||||
|  |         alias = joins[-1] | ||||||
|  |         col = target.column | ||||||
|  |         if not field.rel: | ||||||
|  |             # To avoid inadvertent trimming of a necessary alias, use the | ||||||
|  |             # refcount to show that we are referencing a non-relation field on | ||||||
|  |             # the model. | ||||||
|  |             self.query.ref_alias(alias) | ||||||
|  |  | ||||||
|  |         # Must use left outer joins for nullable fields and their relations. | ||||||
|  |         # Ordering or distinct must not affect the returned set, and INNER | ||||||
|  |         # JOINS for nullable fields could do this. | ||||||
|  |         self.query.promote_alias_chain(joins, | ||||||
|  |             self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER) | ||||||
|  |         return field, col, alias, joins, opts | ||||||
|  |  | ||||||
|  |     def _final_join_removal(self, col, alias): | ||||||
|  |         """ | ||||||
|  |         A helper method for get_distinct and get_ordering. This method will | ||||||
|  |         trim extra not-needed joins from the tail of the join chain. | ||||||
|  |  | ||||||
|  |         This is very similar to what is done in trim_joins, but we will | ||||||
|  |         trim LEFT JOINS here. It would be a good idea to consolidate this | ||||||
|  |         method and query.trim_joins(). | ||||||
|  |         """ | ||||||
|         if alias: |         if alias: | ||||||
|             # We have to do the same "final join" optimisation as in |  | ||||||
|             # add_filter, since the final column might not otherwise be part of |  | ||||||
|             # the select set (so we can't order on it). |  | ||||||
|             while 1: |             while 1: | ||||||
|                 join = self.query.alias_map[alias] |                 join = self.query.alias_map[alias] | ||||||
|                 if col != join[RHS_JOIN_COL]: |                 if col != join[RHS_JOIN_COL]: | ||||||
| @@ -428,7 +488,7 @@ class SQLCompiler(object): | |||||||
|                 self.query.unref_alias(alias) |                 self.query.unref_alias(alias) | ||||||
|                 alias = join[LHS_ALIAS] |                 alias = join[LHS_ALIAS] | ||||||
|                 col = join[LHS_JOIN_COL] |                 col = join[LHS_JOIN_COL] | ||||||
|         return [(alias, col, order)] |         return col, alias | ||||||
|  |  | ||||||
|     def get_from_clause(self): |     def get_from_clause(self): | ||||||
|         """ |         """ | ||||||
| @@ -438,8 +498,8 @@ class SQLCompiler(object): | |||||||
|         from-clause via a "select". |         from-clause via a "select". | ||||||
|  |  | ||||||
|         This should only be called after any SQL construction methods that |         This should only be called after any SQL construction methods that | ||||||
|         might change the tables we need. This means the select columns and |         might change the tables we need. This means the select columns, | ||||||
|         ordering must be done first. |         ordering and distinct must be done first. | ||||||
|         """ |         """ | ||||||
|         result = [] |         result = [] | ||||||
|         qn = self.quote_name_unless_alias |         qn = self.quote_name_unless_alias | ||||||
| @@ -984,6 +1044,7 @@ class SQLAggregateCompiler(SQLCompiler): | |||||||
|         """ |         """ | ||||||
|         if qn is None: |         if qn is None: | ||||||
|             qn = self.quote_name_unless_alias |             qn = self.quote_name_unless_alias | ||||||
|  |  | ||||||
|         sql = ('SELECT %s FROM (%s) subquery' % ( |         sql = ('SELECT %s FROM (%s) subquery' % ( | ||||||
|             ', '.join([ |             ', '.join([ | ||||||
|                 aggregate.as_sql(qn, self.connection) |                 aggregate.as_sql(qn, self.connection) | ||||||
|   | |||||||
| @@ -127,6 +127,7 @@ class Query(object): | |||||||
|         self.order_by = [] |         self.order_by = [] | ||||||
|         self.low_mark, self.high_mark = 0, None  # Used for offset/limit |         self.low_mark, self.high_mark = 0, None  # Used for offset/limit | ||||||
|         self.distinct = False |         self.distinct = False | ||||||
|  |         self.distinct_fields = [] | ||||||
|         self.select_for_update = False |         self.select_for_update = False | ||||||
|         self.select_for_update_nowait = False |         self.select_for_update_nowait = False | ||||||
|         self.select_related = False |         self.select_related = False | ||||||
| @@ -265,6 +266,7 @@ class Query(object): | |||||||
|         obj.order_by = self.order_by[:] |         obj.order_by = self.order_by[:] | ||||||
|         obj.low_mark, obj.high_mark = self.low_mark, self.high_mark |         obj.low_mark, obj.high_mark = self.low_mark, self.high_mark | ||||||
|         obj.distinct = self.distinct |         obj.distinct = self.distinct | ||||||
|  |         obj.distinct_fields = self.distinct_fields[:] | ||||||
|         obj.select_for_update = self.select_for_update |         obj.select_for_update = self.select_for_update | ||||||
|         obj.select_for_update_nowait = self.select_for_update_nowait |         obj.select_for_update_nowait = self.select_for_update_nowait | ||||||
|         obj.select_related = self.select_related |         obj.select_related = self.select_related | ||||||
| @@ -298,6 +300,7 @@ class Query(object): | |||||||
|         else: |         else: | ||||||
|             obj.used_aliases = set() |             obj.used_aliases = set() | ||||||
|         obj.filter_is_sticky = False |         obj.filter_is_sticky = False | ||||||
|  |  | ||||||
|         obj.__dict__.update(kwargs) |         obj.__dict__.update(kwargs) | ||||||
|         if hasattr(obj, '_setup_query'): |         if hasattr(obj, '_setup_query'): | ||||||
|             obj._setup_query() |             obj._setup_query() | ||||||
| @@ -393,7 +396,7 @@ class Query(object): | |||||||
|         Performs a COUNT() query using the current filter constraints. |         Performs a COUNT() query using the current filter constraints. | ||||||
|         """ |         """ | ||||||
|         obj = self.clone() |         obj = self.clone() | ||||||
|         if len(self.select) > 1 or self.aggregate_select: |         if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields): | ||||||
|             # If a select clause exists, then the query has already started to |             # If a select clause exists, then the query has already started to | ||||||
|             # specify the columns that are to be returned. |             # specify the columns that are to be returned. | ||||||
|             # In this case, we need to use a subquery to evaluate the count. |             # In this case, we need to use a subquery to evaluate the count. | ||||||
| @@ -452,6 +455,8 @@ class Query(object): | |||||||
|                 "Cannot combine queries once a slice has been taken." |                 "Cannot combine queries once a slice has been taken." | ||||||
|         assert self.distinct == rhs.distinct, \ |         assert self.distinct == rhs.distinct, \ | ||||||
|             "Cannot combine a unique query with a non-unique query." |             "Cannot combine a unique query with a non-unique query." | ||||||
|  |         assert self.distinct_fields == rhs.distinct_fields, \ | ||||||
|  |             "Cannot combine queries with different distinct fields." | ||||||
|  |  | ||||||
|         self.remove_inherited_models() |         self.remove_inherited_models() | ||||||
|         # Work out how to relabel the rhs aliases, if necessary. |         # Work out how to relabel the rhs aliases, if necessary. | ||||||
| @@ -674,9 +679,9 @@ class Query(object): | |||||||
|         """ Increases the reference count for this alias. """ |         """ Increases the reference count for this alias. """ | ||||||
|         self.alias_refcount[alias] += 1 |         self.alias_refcount[alias] += 1 | ||||||
|  |  | ||||||
|     def unref_alias(self, alias): |     def unref_alias(self, alias, amount=1): | ||||||
|         """ Decreases the reference count for this alias. """ |         """ Decreases the reference count for this alias. """ | ||||||
|         self.alias_refcount[alias] -= 1 |         self.alias_refcount[alias] -= amount | ||||||
|  |  | ||||||
|     def promote_alias(self, alias, unconditional=False): |     def promote_alias(self, alias, unconditional=False): | ||||||
|         """ |         """ | ||||||
| @@ -705,6 +710,15 @@ class Query(object): | |||||||
|             if self.promote_alias(alias, must_promote): |             if self.promote_alias(alias, must_promote): | ||||||
|                 must_promote = True |                 must_promote = True | ||||||
|  |  | ||||||
|  |     def reset_refcounts(self, to_counts): | ||||||
|  |         """ | ||||||
|  |         This method will reset reference counts for aliases so that they match | ||||||
|  |         the value passed in :param to_counts:. | ||||||
|  |         """ | ||||||
|  |         for alias, cur_refcount in self.alias_refcount.copy().items(): | ||||||
|  |             unref_amount = cur_refcount - to_counts.get(alias, 0) | ||||||
|  |             self.unref_alias(alias, unref_amount) | ||||||
|  |  | ||||||
|     def promote_unused_aliases(self, initial_refcounts, used_aliases): |     def promote_unused_aliases(self, initial_refcounts, used_aliases): | ||||||
|         """ |         """ | ||||||
|         Given a "before" copy of the alias_refcounts dictionary (as |         Given a "before" copy of the alias_refcounts dictionary (as | ||||||
| @@ -832,7 +846,8 @@ class Query(object): | |||||||
|     def count_active_tables(self): |     def count_active_tables(self): | ||||||
|         """ |         """ | ||||||
|         Returns the number of tables in this query with a non-zero reference |         Returns the number of tables in this query with a non-zero reference | ||||||
|         count. |         count. Note that after execution, the reference counts are zeroed, so | ||||||
|  |         tables added in compiler will not be seen by this method. | ||||||
|         """ |         """ | ||||||
|         return len([1 for count in self.alias_refcount.itervalues() if count]) |         return len([1 for count in self.alias_refcount.itervalues() if count]) | ||||||
|  |  | ||||||
| @@ -1596,6 +1611,13 @@ class Query(object): | |||||||
|         self.select = [] |         self.select = [] | ||||||
|         self.select_fields = [] |         self.select_fields = [] | ||||||
|  |  | ||||||
|  |     def add_distinct_fields(self, *field_names): | ||||||
|  |         """ | ||||||
|  |         Adds and resolves the given fields to the query's "distinct on" clause. | ||||||
|  |         """ | ||||||
|  |         self.distinct_fields = field_names | ||||||
|  |         self.distinct = True | ||||||
|  |  | ||||||
|     def add_fields(self, field_names, allow_m2m=True): |     def add_fields(self, field_names, allow_m2m=True): | ||||||
|         """ |         """ | ||||||
|         Adds the given (model) fields to the select set. The field names are |         Adds the given (model) fields to the select set. The field names are | ||||||
|   | |||||||
| @@ -345,7 +345,7 @@ remain undefined afterward). | |||||||
| distinct | distinct | ||||||
| ~~~~~~~~ | ~~~~~~~~ | ||||||
|  |  | ||||||
| .. method:: distinct() | .. method:: distinct([*fields]) | ||||||
|  |  | ||||||
| Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This | Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This | ||||||
| eliminates duplicate rows from the query results. | eliminates duplicate rows from the query results. | ||||||
| @@ -374,6 +374,43 @@ query spans multiple tables, it's possible to get duplicate results when a | |||||||
|     :meth:`values()` together, be careful when ordering by fields not in the |     :meth:`values()` together, be careful when ordering by fields not in the | ||||||
|     :meth:`values()` call. |     :meth:`values()` call. | ||||||
|  |  | ||||||
|  | .. versionadded:: 1.4 | ||||||
|  |  | ||||||
|  | The possibility to pass positional arguments (``*fields``) is new in Django 1.4. | ||||||
|  | They are names of fields to which the ``DISTINCT`` should be limited. This | ||||||
|  | translates to a ``SELECT DISTINCT ON`` SQL query. A ``DISTINCT ON`` query eliminates | ||||||
|  | duplicate rows not by comparing all fields in a row, but by comparing only the given | ||||||
|  | fields. | ||||||
|  |  | ||||||
|  | .. note:: | ||||||
|  |     Note that the ability to specify field names is only available in PostgreSQL. | ||||||
|  |  | ||||||
|  | .. note:: | ||||||
|  |     When using the ``DISTINCT ON`` functionality it is required that the columns given | ||||||
|  |     to :meth:`distinct` match the first :meth:`order_by` columns. For example ``SELECT | ||||||
|  |     DISTINCT ON (a)`` gives you the first row for each value in column ``a``. If you | ||||||
|  |     don't specify an order, then you'll get some arbitrary row. | ||||||
|  |  | ||||||
|  | Examples:: | ||||||
|  |  | ||||||
|  |     >>> Author.objects.distinct() | ||||||
|  |     [...] | ||||||
|  |  | ||||||
|  |     >>> Entry.objects.order_by('pub_date').distinct('pub_date') | ||||||
|  |     [...] | ||||||
|  |  | ||||||
|  |     >>> Entry.objects.order_by('blog').distinct('blog') | ||||||
|  |     [...] | ||||||
|  |  | ||||||
|  |     >>> Entry.objects.order_by('author', 'pub_date').distinct('author', 'pub_date') | ||||||
|  |     [...] | ||||||
|  |  | ||||||
|  |     >>> Entry.objects.order_by('blog__name', 'mod_date').distinct('blog__name', 'mod_date') | ||||||
|  |     [...] | ||||||
|  |  | ||||||
|  |     >>> Entry.objects.order_by('author', 'pub_date').distinct('author') | ||||||
|  |     [...] | ||||||
|  |  | ||||||
| values | values | ||||||
| ~~~~~~ | ~~~~~~ | ||||||
|  |  | ||||||
|   | |||||||
| @@ -507,6 +507,16 @@ Django 1.4 also includes several smaller improvements worth noting: | |||||||
|   ``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other |   ``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other | ||||||
|   cache backends. |   cache backends. | ||||||
|  |  | ||||||
|  | * Support in the ORM for generating ``SELECT`` queries containing ``DISTINCT ON`` | ||||||
|  |  | ||||||
|  |   The ``distinct()`` ``Queryset`` method now accepts an optional list of model | ||||||
|  |   field names. If specified, then the ``DISTINCT`` statement is limited to these | ||||||
|  |   fields.  The PostgreSQL is the only of the database backends shipped with | ||||||
|  |   Django that supports this new functionality. | ||||||
|  |  | ||||||
|  |   For more details, see the documentation for | ||||||
|  |   :meth:`~django.db.models.query.QuerySet.distinct`. | ||||||
|  |  | ||||||
| Backwards incompatible changes in 1.4 | Backwards incompatible changes in 1.4 | ||||||
| ===================================== | ===================================== | ||||||
|  |  | ||||||
|   | |||||||
| @@ -498,6 +498,16 @@ Django 1.4 also includes several smaller improvements worth noting: | |||||||
|   ``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other |   ``pickle.HIGHEST_PROTOCOL`` for better compatibility with the other | ||||||
|   cache backends. |   cache backends. | ||||||
|  |  | ||||||
|  | * Support in the ORM for generating ``SELECT`` queries containing ``DISTINCT ON`` | ||||||
|  |  | ||||||
|  |   The ``distinct()`` ``Queryset`` method now accepts an optional list of model | ||||||
|  |   field names. If specified, then the ``DISTINCT`` statement is limited to these | ||||||
|  |   fields.  The PostgreSQL is the only of the database backends shipped with | ||||||
|  |   Django that supports this new functionality. | ||||||
|  |  | ||||||
|  |   For more details, see the documentation for | ||||||
|  |   :meth:`~django.db.models.query.QuerySet.distinct`. | ||||||
|  |  | ||||||
| .. _backwards-incompatible-changes-1.4: | .. _backwards-incompatible-changes-1.4: | ||||||
|  |  | ||||||
| Backwards incompatible changes in 1.4 | Backwards incompatible changes in 1.4 | ||||||
|   | |||||||
							
								
								
									
										1
									
								
								tests/modeltests/distinct_on_fields/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								tests/modeltests/distinct_on_fields/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | |||||||
|  | # | ||||||
							
								
								
									
										39
									
								
								tests/modeltests/distinct_on_fields/models.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								tests/modeltests/distinct_on_fields/models.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,39 @@ | |||||||
|  | from django.db import models | ||||||
|  |  | ||||||
|  | class Tag(models.Model): | ||||||
|  |     name = models.CharField(max_length=10) | ||||||
|  |     parent = models.ForeignKey('self', blank=True, null=True, | ||||||
|  |             related_name='children') | ||||||
|  |  | ||||||
|  |     class Meta: | ||||||
|  |         ordering = ['name'] | ||||||
|  |  | ||||||
|  |     def __unicode__(self): | ||||||
|  |         return self.name | ||||||
|  |  | ||||||
|  | class Celebrity(models.Model): | ||||||
|  |     name = models.CharField("Name", max_length=20) | ||||||
|  |     greatest_fan = models.ForeignKey("Fan", null=True, unique=True) | ||||||
|  |  | ||||||
|  |     def __unicode__(self): | ||||||
|  |         return self.name | ||||||
|  |  | ||||||
|  | class Fan(models.Model): | ||||||
|  |     fan_of = models.ForeignKey(Celebrity) | ||||||
|  |  | ||||||
|  | class Staff(models.Model): | ||||||
|  |     id = models.IntegerField(primary_key=True) | ||||||
|  |     name = models.CharField(max_length=50) | ||||||
|  |     organisation = models.CharField(max_length=100) | ||||||
|  |     tags = models.ManyToManyField(Tag, through='StaffTag') | ||||||
|  |     coworkers = models.ManyToManyField('self') | ||||||
|  |  | ||||||
|  |     def __unicode__(self): | ||||||
|  |         return self.name | ||||||
|  |  | ||||||
|  | class StaffTag(models.Model): | ||||||
|  |     staff = models.ForeignKey(Staff) | ||||||
|  |     tag = models.ForeignKey(Tag) | ||||||
|  |  | ||||||
|  |     def __unicode__(self): | ||||||
|  |         return u"%s -> %s" % (self.tag, self.staff) | ||||||
							
								
								
									
										116
									
								
								tests/modeltests/distinct_on_fields/tests.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								tests/modeltests/distinct_on_fields/tests.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,116 @@ | |||||||
|  | from __future__ import absolute_import, with_statement | ||||||
|  |  | ||||||
|  | from django.db.models import Max | ||||||
|  | from django.test import TestCase, skipUnlessDBFeature | ||||||
|  |  | ||||||
|  | from .models import Tag, Celebrity, Fan, Staff, StaffTag | ||||||
|  |  | ||||||
|  | class DistinctOnTests(TestCase): | ||||||
|  |     def setUp(self): | ||||||
|  |         t1 = Tag.objects.create(name='t1') | ||||||
|  |         t2 = Tag.objects.create(name='t2', parent=t1) | ||||||
|  |         t3 = Tag.objects.create(name='t3', parent=t1) | ||||||
|  |         t4 = Tag.objects.create(name='t4', parent=t3) | ||||||
|  |         t5 = Tag.objects.create(name='t5', parent=t3) | ||||||
|  |  | ||||||
|  |         p1_o1 = Staff.objects.create(id=1, name="p1", organisation="o1") | ||||||
|  |         p2_o1 = Staff.objects.create(id=2, name="p2", organisation="o1") | ||||||
|  |         p3_o1 = Staff.objects.create(id=3, name="p3", organisation="o1") | ||||||
|  |         p1_o2 = Staff.objects.create(id=4, name="p1", organisation="o2") | ||||||
|  |         p1_o1.coworkers.add(p2_o1, p3_o1) | ||||||
|  |         StaffTag.objects.create(staff=p1_o1, tag=t1) | ||||||
|  |         StaffTag.objects.create(staff=p1_o1, tag=t1) | ||||||
|  |  | ||||||
|  |         celeb1 = Celebrity.objects.create(name="c1") | ||||||
|  |         celeb2 = Celebrity.objects.create(name="c2") | ||||||
|  |  | ||||||
|  |         self.fan1 = Fan.objects.create(fan_of=celeb1) | ||||||
|  |         self.fan2 = Fan.objects.create(fan_of=celeb1) | ||||||
|  |         self.fan3 = Fan.objects.create(fan_of=celeb2) | ||||||
|  |  | ||||||
|  |     @skipUnlessDBFeature('can_distinct_on_fields') | ||||||
|  |     def test_basic_distinct_on(self): | ||||||
|  |         """QuerySet.distinct('field', ...) works""" | ||||||
|  |         # (qset, expected) tuples | ||||||
|  |         qsets = ( | ||||||
|  |             ( | ||||||
|  |                 Staff.objects.distinct().order_by('name'), | ||||||
|  |                 ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'], | ||||||
|  |             ), | ||||||
|  |             ( | ||||||
|  |                 Staff.objects.distinct('name').order_by('name'), | ||||||
|  |                 ['<Staff: p1>', '<Staff: p2>', '<Staff: p3>'], | ||||||
|  |             ), | ||||||
|  |             ( | ||||||
|  |                 Staff.objects.distinct('organisation').order_by('organisation', 'name'), | ||||||
|  |                 ['<Staff: p1>', '<Staff: p1>'], | ||||||
|  |             ), | ||||||
|  |             ( | ||||||
|  |                 Staff.objects.distinct('name', 'organisation').order_by('name', 'organisation'), | ||||||
|  |                 ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'], | ||||||
|  |             ), | ||||||
|  |             ( | ||||||
|  |                 Celebrity.objects.filter(fan__in=[self.fan1, self.fan2, self.fan3]).\ | ||||||
|  |                     distinct('name').order_by('name'), | ||||||
|  |                 ['<Celebrity: c1>', '<Celebrity: c2>'], | ||||||
|  |             ), | ||||||
|  |             # Does combining querysets work? | ||||||
|  |             ( | ||||||
|  |                 (Celebrity.objects.filter(fan__in=[self.fan1, self.fan2]).\ | ||||||
|  |                     distinct('name').order_by('name') | ||||||
|  |                 |Celebrity.objects.filter(fan__in=[self.fan3]).\ | ||||||
|  |                     distinct('name').order_by('name')), | ||||||
|  |                 ['<Celebrity: c1>', '<Celebrity: c2>'], | ||||||
|  |             ), | ||||||
|  |             ( | ||||||
|  |                 StaffTag.objects.distinct('staff','tag'), | ||||||
|  |                 ['<StaffTag: t1 -> p1>'], | ||||||
|  |             ), | ||||||
|  |             ( | ||||||
|  |                 Tag.objects.order_by('parent__pk', 'pk').distinct('parent'), | ||||||
|  |                 ['<Tag: t2>', '<Tag: t4>', '<Tag: t1>'], | ||||||
|  |             ), | ||||||
|  |             ( | ||||||
|  |                 StaffTag.objects.select_related('staff').distinct('staff__name').order_by('staff__name'), | ||||||
|  |                 ['<StaffTag: t1 -> p1>'], | ||||||
|  |             ), | ||||||
|  |             # Fetch the alphabetically first coworker for each worker | ||||||
|  |             ( | ||||||
|  |                 (Staff.objects.distinct('id').order_by('id', 'coworkers__name'). | ||||||
|  |                                values_list('id', 'coworkers__name')), | ||||||
|  |                 ["(1, u'p2')", "(2, u'p1')", "(3, u'p1')", "(4, None)"] | ||||||
|  |             ), | ||||||
|  |         ) | ||||||
|  |         for qset, expected in qsets: | ||||||
|  |             self.assertQuerysetEqual(qset, expected) | ||||||
|  |             self.assertEqual(qset.count(), len(expected)) | ||||||
|  |  | ||||||
|  |         # Combining queries with different distinct_fields is not allowed. | ||||||
|  |         base_qs = Celebrity.objects.all() | ||||||
|  |         self.assertRaisesMessage( | ||||||
|  |             AssertionError, | ||||||
|  |             "Cannot combine queries with different distinct fields.", | ||||||
|  |             lambda: (base_qs.distinct('id') & base_qs.distinct('name')) | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |         # Test join unreffing | ||||||
|  |         c1 = Celebrity.objects.distinct('greatest_fan__id', 'greatest_fan__fan_of') | ||||||
|  |         self.assertIn('OUTER JOIN', str(c1.query)) | ||||||
|  |         c2 = c1.distinct('pk') | ||||||
|  |         self.assertNotIn('OUTER JOIN', str(c2.query)) | ||||||
|  |  | ||||||
|  |     @skipUnlessDBFeature('can_distinct_on_fields') | ||||||
|  |     def test_distinct_not_implemented_checks(self): | ||||||
|  |         # distinct + annotate not allowed | ||||||
|  |         with self.assertRaises(NotImplementedError): | ||||||
|  |             Celebrity.objects.annotate(Max('id')).distinct('id')[0] | ||||||
|  |         with self.assertRaises(NotImplementedError): | ||||||
|  |             Celebrity.objects.distinct('id').annotate(Max('id'))[0] | ||||||
|  |  | ||||||
|  |         # However this check is done only when the query executes, so you | ||||||
|  |         # can use distinct() to remove the fields before execution. | ||||||
|  |         Celebrity.objects.distinct('id').annotate(Max('id')).distinct()[0] | ||||||
|  |         # distinct + aggregate not allowed | ||||||
|  |         with self.assertRaises(NotImplementedError): | ||||||
|  |             Celebrity.objects.distinct('id').aggregate(Max('id')) | ||||||
|  |  | ||||||
| @@ -209,6 +209,9 @@ class Celebrity(models.Model): | |||||||
|     name = models.CharField("Name", max_length=20) |     name = models.CharField("Name", max_length=20) | ||||||
|     greatest_fan = models.ForeignKey("Fan", null=True, unique=True) |     greatest_fan = models.ForeignKey("Fan", null=True, unique=True) | ||||||
|  |  | ||||||
|  |     def __unicode__(self): | ||||||
|  |         return self.name | ||||||
|  |  | ||||||
| class TvChef(Celebrity): | class TvChef(Celebrity): | ||||||
|     pass |     pass | ||||||
|  |  | ||||||
| @@ -343,4 +346,3 @@ class OneToOneCategory(models.Model): | |||||||
|  |  | ||||||
|     def __unicode__(self): |     def __unicode__(self): | ||||||
|         return "one2one " + self.new_name |         return "one2one " + self.new_name | ||||||
|  |  | ||||||
|   | |||||||
| @@ -234,18 +234,22 @@ class Queries1Tests(BaseQuerysetTest): | |||||||
|             ['<Item: four>', '<Item: one>'] |             ['<Item: four>', '<Item: one>'] | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|     # FIXME: This is difficult to fix and very much an edge case, so punt for |  | ||||||
|     # now.  This is related to the order_by() tests for ticket #2253, but the |  | ||||||
|     # old bug exhibited itself here (q2 was pulling too many tables into the |  | ||||||
|     # combined query with the new ordering, but only because we have evaluated |  | ||||||
|     # q2 already). |  | ||||||
|     @unittest.expectedFailure |  | ||||||
|     def test_order_by_tables(self): |     def test_order_by_tables(self): | ||||||
|         q1 = Item.objects.order_by('name') |         q1 = Item.objects.order_by('name') | ||||||
|         q2 = Item.objects.filter(id=self.i1.id) |         q2 = Item.objects.filter(id=self.i1.id) | ||||||
|         list(q2) |         list(q2) | ||||||
|         self.assertEqual(len((q1 & q2).order_by('name').query.tables), 1) |         self.assertEqual(len((q1 & q2).order_by('name').query.tables), 1) | ||||||
|  |  | ||||||
|  |     def test_order_by_join_unref(self): | ||||||
|  |         """ | ||||||
|  |         This test is related to the above one, testing that there aren't | ||||||
|  |         old JOINs in the query. | ||||||
|  |         """ | ||||||
|  |         qs = Celebrity.objects.order_by('greatest_fan__fan_of') | ||||||
|  |         self.assertIn('OUTER JOIN', str(qs.query)) | ||||||
|  |         qs = qs.order_by('id') | ||||||
|  |         self.assertNotIn('OUTER JOIN', str(qs.query)) | ||||||
|  |  | ||||||
|     def test_tickets_4088_4306(self): |     def test_tickets_4088_4306(self): | ||||||
|         self.assertQuerysetEqual( |         self.assertQuerysetEqual( | ||||||
|             Report.objects.filter(creator=1001), |             Report.objects.filter(creator=1001), | ||||||
| @@ -1728,7 +1732,7 @@ class ToFieldTests(TestCase): | |||||||
|  |  | ||||||
|  |  | ||||||
| class ConditionalTests(BaseQuerysetTest): | class ConditionalTests(BaseQuerysetTest): | ||||||
|     """Tests whose execution depend on dfferent environment conditions like |     """Tests whose execution depend on different environment conditions like | ||||||
|     Python version or DB backend features""" |     Python version or DB backend features""" | ||||||
|  |  | ||||||
|     def setUp(self): |     def setUp(self): | ||||||
| @@ -1739,6 +1743,7 @@ class ConditionalTests(BaseQuerysetTest): | |||||||
|         t4 = Tag.objects.create(name='t4', parent=t3) |         t4 = Tag.objects.create(name='t4', parent=t3) | ||||||
|         t5 = Tag.objects.create(name='t5', parent=t3) |         t5 = Tag.objects.create(name='t5', parent=t3) | ||||||
|  |  | ||||||
|  |  | ||||||
|     # In Python 2.6 beta releases, exceptions raised in __len__ are swallowed |     # In Python 2.6 beta releases, exceptions raised in __len__ are swallowed | ||||||
|     # (Python issue 1242657), so these cases return an empty list, rather than |     # (Python issue 1242657), so these cases return an empty list, rather than | ||||||
|     # raising an exception. Not a lot we can do about that, unfortunately, due to |     # raising an exception. Not a lot we can do about that, unfortunately, due to | ||||||
| @@ -1810,6 +1815,7 @@ class ConditionalTests(BaseQuerysetTest): | |||||||
|             2500 |             2500 | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class UnionTests(unittest.TestCase): | class UnionTests(unittest.TestCase): | ||||||
|     """ |     """ | ||||||
|     Tests for the union of two querysets. Bug #12252. |     Tests for the union of two querysets. Bug #12252. | ||||||
|   | |||||||
| @@ -40,9 +40,9 @@ class SelectRelatedRegressTests(TestCase): | |||||||
|         self.assertEqual([(c.id, unicode(c.start), unicode(c.end)) for c in connections], |         self.assertEqual([(c.id, unicode(c.start), unicode(c.end)) for c in connections], | ||||||
|             [(c1.id, u'router/4', u'switch/7'), (c2.id, u'switch/7', u'server/1')]) |             [(c1.id, u'router/4', u'switch/7'), (c2.id, u'switch/7', u'server/1')]) | ||||||
|  |  | ||||||
|         # This final query should only join seven tables (port, device and building |         # This final query should only have seven tables (port, device and building | ||||||
|         # twice each, plus connection once). |         # twice each, plus connection once). Thus, 6 joins plus the FROM table. | ||||||
|         self.assertEqual(connections.query.count_active_tables(), 7) |         self.assertEqual(str(connections.query).count(" JOIN "), 6) | ||||||
|  |  | ||||||
|  |  | ||||||
|     def test_regression_8106(self): |     def test_regression_8106(self): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user