From ddb85294159185c5bd5cae34c9ef735ff8409bfe Mon Sep 17 00:00:00 2001 From: ontowhee <82607723+ontowhee@users.noreply.github.com> Date: Sat, 15 Mar 2025 19:23:28 -0700 Subject: [PATCH] Fixed #34262 -- Added support for AnyValue for SQLite, MySQL, Oracle, and Postgresql 16+. Thanks Simon Charette for the guidance and review. Thanks Tim Schilling for the documentation review. Thanks David Wobrock for investigation and solution proposals. --- django/db/backends/base/features.py | 3 ++ django/db/backends/mysql/features.py | 14 ++--- django/db/backends/oracle/features.py | 1 + django/db/backends/postgresql/features.py | 2 + django/db/backends/sqlite3/_functions.py | 6 +++ django/db/backends/sqlite3/features.py | 1 + django/db/models/aggregates.py | 15 ++++++ docs/ref/models/querysets.txt | 54 ++++++++++++++++++++ docs/releases/6.0.txt | 4 ++ docs/topics/db/aggregation.txt | 62 +++++++++++++++++++++++ tests/aggregation/tests.py | 61 +++++++++++++++++++++- 11 files changed, 212 insertions(+), 11 deletions(-) diff --git a/django/db/backends/base/features.py b/django/db/backends/base/features.py index d3b148dc3d..87fc7204ee 100644 --- a/django/db/backends/base/features.py +++ b/django/db/backends/base/features.py @@ -266,6 +266,9 @@ class BaseDatabaseFeatures: # delimiter along with DISTINCT. supports_aggregate_distinct_multiple_argument = True + # Does the database support SQL 2023 ANY_VALUE in GROUP BY? + supports_any_value = False + # Does the backend support indexing a TextField? supports_index_on_text_field = True diff --git a/django/db/backends/mysql/features.py b/django/db/backends/mysql/features.py index d0749eb5e6..6ae4c56af1 100644 --- a/django/db/backends/mysql/features.py +++ b/django/db/backends/mysql/features.py @@ -111,16 +111,6 @@ class DatabaseFeatures(BaseDatabaseFeatures): }, } ) - if "ONLY_FULL_GROUP_BY" in self.connection.sql_mode: - skips.update( - { - "GROUP BY cannot contain nonaggregated column when " - "ONLY_FULL_GROUP_BY mode is enabled on MySQL, see #34262.": { - "aggregation.tests.AggregateTestCase." - "test_group_by_nested_expression_with_params", - }, - } - ) if self.connection.mysql_version < (8, 0, 31): skips.update( { @@ -297,3 +287,7 @@ class DatabaseFeatures(BaseDatabaseFeatures): if self.connection.mysql_is_mariadb: return "ONLY_FULL_GROUP_BY" not in self.connection.sql_mode return True + + @cached_property + def supports_any_value(self): + return not self.connection.mysql_is_mariadb diff --git a/django/db/backends/oracle/features.py b/django/db/backends/oracle/features.py index b355e7136c..fd327fabcc 100644 --- a/django/db/backends/oracle/features.py +++ b/django/db/backends/oracle/features.py @@ -61,6 +61,7 @@ class DatabaseFeatures(BaseDatabaseFeatures): END; """ supports_callproc_kwargs = True + supports_any_value = True supports_over_clause = True supports_frame_range_fixed_distance = True supports_ignore_conflicts = False diff --git a/django/db/backends/postgresql/features.py b/django/db/backends/postgresql/features.py index 74a65fdbd9..83e6b5cf7f 100644 --- a/django/db/backends/postgresql/features.py +++ b/django/db/backends/postgresql/features.py @@ -162,3 +162,5 @@ class DatabaseFeatures(BaseDatabaseFeatures): supports_nulls_distinct_unique_constraints = property( operator.attrgetter("is_postgresql_15") ) + + supports_any_value = property(operator.attrgetter("is_postgresql_16")) diff --git a/django/db/backends/sqlite3/_functions.py b/django/db/backends/sqlite3/_functions.py index 6d07d3d78b..b38e06eec2 100644 --- a/django/db/backends/sqlite3/_functions.py +++ b/django/db/backends/sqlite3/_functions.py @@ -80,6 +80,7 @@ def register(connection): connection.create_aggregate("STDDEV_SAMP", 1, StdDevSamp) connection.create_aggregate("VAR_POP", 1, VarPop) connection.create_aggregate("VAR_SAMP", 1, VarSamp) + connection.create_aggregate("ANY_VALUE", 1, AnyValue) # Some math functions are enabled by default in SQLite 3.35+. sql = "select sqlite_compileoption_used('ENABLE_MATH_FUNCTIONS')" if not connection.execute(sql).fetchone()[0]: @@ -513,3 +514,8 @@ class VarPop(ListAggregate): class VarSamp(ListAggregate): finalize = statistics.variance + + +class AnyValue(ListAggregate): + def finalize(self): + return self[0] diff --git a/django/db/backends/sqlite3/features.py b/django/db/backends/sqlite3/features.py index 1e995a87c0..8604adf40a 100644 --- a/django/db/backends/sqlite3/features.py +++ b/django/db/backends/sqlite3/features.py @@ -36,6 +36,7 @@ class DatabaseFeatures(BaseDatabaseFeatures): supports_aggregate_filter_clause = True supports_aggregate_order_by_clause = Database.sqlite_version_info >= (3, 44, 0) supports_aggregate_distinct_multiple_argument = False + supports_any_value = True order_by_nulls_first = True supports_json_field_contains = False supports_update_conflicts = True diff --git a/django/db/models/aggregates.py b/django/db/models/aggregates.py index 4174904f71..2a7ed23bc3 100644 --- a/django/db/models/aggregates.py +++ b/django/db/models/aggregates.py @@ -22,6 +22,7 @@ from django.db.models.functions.mixins import ( __all__ = [ "Aggregate", + "AnyValue", "Avg", "Count", "Max", @@ -229,6 +230,20 @@ class Aggregate(Func): return options +class AnyValue(Aggregate): + function = "ANY_VALUE" + name = "AnyValue" + arity = 1 + window_compatible = False + + def as_sql(self, compiler, connection, **extra_context): + if not connection.features.supports_any_value: + raise NotSupportedError( + "ANY_VALUE is not supported on this database backend." + ) + return super().as_sql(compiler, connection, **extra_context) + + class Avg(FixDurationInputMixin, NumericOutputFieldMixin, Aggregate): function = "AVG" name = "Avg" diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt index c7e8b974a7..ef6ceb36ad 100644 --- a/docs/ref/models/querysets.txt +++ b/docs/ref/models/querysets.txt @@ -3943,6 +3943,60 @@ when the queryset (or grouping) contains no entries. Keyword arguments that can provide extra context for the SQL generated by the aggregate. +``AnyValue`` +~~~~~~~~~~~~ + +.. versionadded:: 6.0 + +.. class:: AnyValue(expression, output_field=None, filter=None, default=None, **extra) + + Returns an arbitrary value from the non-null input values. + + * Default alias: ``__anyvalue`` + * Return type: same as input field, or ``output_field`` if supplied. If the + queryset or grouping is empty, ``default`` is returned. + + Usage example: + + .. code-block:: pycon + + >>> # Get average rating for each year along with a sample headline + >>> # from that year. + >>> from django.db.models import AnyValue, Avg, F, Q + >>> sample_headline = AnyValue("headline") + >>> Entry.objects.values( + ... pub_year=F("pub_date__year"), + ... ).annotate( + ... avg_rating=Avg("rating"), + ... sample_headline=sample_headline, + ... ) + + >>> # Get a sample headline from each year with rating greater than 4.5. + >>> sample_headline = AnyValue( + ... "headline", + ... filter=Q(rating__gt=4.5), + ... ) + >>> Entry.objects.values( + ... pub_year=F("pub_date__year"), + ... ).annotate( + ... avg_rating=Avg("rating"), + ... sample_headline=sample_headline, + ... ) + + Supported on SQLite, MySQL, Oracle, and PostgreSQL 16+. + + .. admonition:: MySQL with ``ONLY_FULL_GROUP_BY`` enabled + + When the ``ONLY_FULL_GROUP_BY`` SQL mode is enabled on MySQL it may be + necessary to use ``AnyValue`` if an aggregation includes a mix of + aggregate and non-aggregate functions. Using ``AnyValue`` allows the + non-aggregate function to be referenced in the select list when + database cannot determine that it is functionally dependent on the + columns in the `group by`_ clause. See the :ref:`aggregation + documentation ` for more details. + + .. _group by: https://dev.mysql.com/doc/refman/8.4/en/group-by-handling.html + ``Avg`` ~~~~~~~ diff --git a/docs/releases/6.0.txt b/docs/releases/6.0.txt index b950412e20..ade85a2173 100644 --- a/docs/releases/6.0.txt +++ b/docs/releases/6.0.txt @@ -212,6 +212,10 @@ Models * :class:`~django.db.models.JSONField` now supports :ref:`negative array indexing ` on SQLite. +* The new :class:`~django.db.models.AnyValue` aggregate returns an arbitrary + value from the non-null input values. This is supported on SQLite, MySQL, + Oracle, and PostgreSQL 16+. + Pagination ~~~~~~~~~~ diff --git a/docs/topics/db/aggregation.txt b/docs/topics/db/aggregation.txt index cd53f73c74..bd90c4b5fe 100644 --- a/docs/topics/db/aggregation.txt +++ b/docs/topics/db/aggregation.txt @@ -679,3 +679,65 @@ no books can be found: Under the hood, the :ref:`default ` argument is implemented by wrapping the aggregate function with :class:`~django.db.models.functions.Coalesce`. + +.. _aggregation-mysql-only-full-group-by: + +Aggregating with MySQL ``ONLY_FULL_GROUP_BY`` enabled +----------------------------------------------------- + +When using the ``values()`` clause to group query results for annotations in +MySQL with the ``ONLY_FULL_GROUP_BY`` SQL mode enabled, you may need to apply +:class:`~django.db.models.AnyValue` if the annotation includes a mix of +aggregate and non-aggregate expressions. + +Take the following example: + +.. code-block:: pycon + + >>> from django.db.models import F, Count, Greatest + >>> Book.objects.values(greatest_pages=Greatest("pages", 600)).annotate( + ... num_authors=Count("authors"), + ... pages_per_author=F("greatest_pages") / F("num_authors"), + ... ).aggregate(Avg("pages_per_author")) + +This creates groups of books based on the SQL column ``GREATEST(pages, 600)``. +One unique group consists of books with 600 pages or less, and other unique +groups will consist of books with the same pages. The ``pages_per_author`` +annotation is composed of aggregate and non-aggregate expressions, +``num_authors`` is an aggregate expression while ``greatest_page`` isn't. + +Since the grouping is based on the ``greatest_pages`` expression, MySQL may be +unable to determine that ``greatest_pages`` (used in the ``pages_per_author`` +expression) is functionally dependent on the grouped column. As a result, it +may raise an error like: + +.. code-block:: pytb + + OperationalError: (1055, "Expression #2 of SELECT list is not in GROUP BY + clause and contains nonaggregated column 'book_book.pages' which is not + functionally dependent on columns in GROUP BY clause; this is incompatible + with sql_mode=only_full_group_by") + +To avoid this, you can wrap the non-aggregate expression with +:class:`~django.db.models.AnyValue`. + +.. code-block:: pycon + + >>> from django.db.models import F, Count, Greatest + >>> Book.objects.values( + ... greatest_pages=Greatest("pages", 600), + ... ).annotate( + ... num_authors=Count("authors"), + ... pages_per_author=AnyValue(F("greatest_pages")) / F("num_authors"), + ... ).aggregate(Avg("pages_per_author")) + {'pages_per_author__avg': 532.57143333} + +Other supported databases do not encounter the ``OperationalError`` in the +example above because they can detect the functional dependency. In general, +``AnyValue`` is useful when dealing with select list columns that involve +non-aggregate functions or complex expressions not recognized by the database +as functionally dependent on the columns in the grouping clause. + +.. versionchanged:: 6.0 + + The :class:`~django.db.models.AnyValue` aggregate was added. diff --git a/tests/aggregation/tests.py b/tests/aggregation/tests.py index a8ce6ed1d8..db61716e2e 100644 --- a/tests/aggregation/tests.py +++ b/tests/aggregation/tests.py @@ -6,6 +6,7 @@ from decimal import Decimal from django.core.exceptions import FieldError from django.db import NotSupportedError, connection from django.db.models import ( + AnyValue, Avg, Case, CharField, @@ -1662,6 +1663,10 @@ class AggregateTestCase(TestCase): self.assertEqual(dict(has_long_books_breakdown), {True: 2, False: 3}) def test_group_by_nested_expression_with_params(self): + greatest_pages_param = "greatest_pages" + if connection.vendor == "mysql" and connection.features.supports_any_value: + greatest_pages_param = AnyValue("greatest_pages") + books_qs = ( Book.objects.annotate(greatest_pages=Greatest("pages", Value(600))) .values( @@ -1669,12 +1674,66 @@ class AggregateTestCase(TestCase): ) .annotate( min_pages=Min("pages"), - least=Least("min_pages", "greatest_pages"), + least=Least("min_pages", greatest_pages_param), ) .values_list("least", flat=True) ) self.assertCountEqual(books_qs, [300, 946, 1132]) + @skipUnlessDBFeature("supports_any_value") + def test_any_value(self): + books_qs = ( + Book.objects.values(greatest_pages=Greatest("pages", 600)) + .annotate( + pubdate_year=AnyValue("pubdate__year"), + ) + .values_list("pubdate_year", flat=True) + .order_by("pubdate_year") + ) + self.assertCountEqual(books_qs[0:2], [1991, 1995]) + self.assertIn(books_qs[2], [2007, 2008]) + + @skipUnlessDBFeature("supports_any_value") + def test_any_value_filter(self): + books_qs = ( + Book.objects.values(greatest_pages=Greatest("pages", 600)) + .annotate( + pubdate_year=AnyValue("pubdate__year", filter=Q(rating__lte=4.5)), + ) + .values_list("pubdate_year", flat=True) + ) + self.assertCountEqual(books_qs, [2007, 1995, None]) + + @skipUnlessDBFeature("supports_any_value") + def test_any_value_aggregate_clause(self): + books_qs = ( + Book.objects.values(greatest_pages=Greatest("pages", 600)) + .annotate( + num_authors=Count("authors"), + pages_per_author=( + AnyValue("greatest_pages") / (Cast("num_authors", FloatField())) + ), + ) + .values_list("pages_per_author", flat=True) + .order_by("pages_per_author") + ) + self.assertAlmostEqual(books_qs[0], 600 / 7, places=4) + self.assertAlmostEqual(books_qs[1], 1132 / 2, places=4) + self.assertAlmostEqual(books_qs[2], 946 / 1, places=4) + + aggregate_qs = books_qs.aggregate(Avg("pages_per_author")) + self.assertAlmostEqual( + aggregate_qs["pages_per_author__avg"], + ((600 / 7) + (1132 / 2) + (946 / 1)) / 3, + places=4, + ) + + @skipIfDBFeature("supports_any_value") + def test_any_value_not_supported(self): + message = "ANY_VALUE is not supported on this database backend." + with self.assertRaisesMessage(NotSupportedError, message): + Book.objects.aggregate(AnyValue("rating")) + @skipUnlessDBFeature("supports_subqueries_in_group_by") def test_aggregation_subquery_annotation_related_field(self): publisher = Publisher.objects.create(name=self.a9.name, num_awards=2)