From e097e8a12f21a4e92594830f1ad1942b31916d0f Mon Sep 17 00:00:00 2001 From: Adam Johnson Date: Wed, 29 Nov 2023 09:35:34 +0000 Subject: [PATCH] Fixed #28586 -- Added model field fetch modes. May your database queries be much reduced with minimal effort. co-authored-by: Andreas Pelme co-authored-by: Simon Charette co-authored-by: Jacob Walls --- django/contrib/contenttypes/fields.py | 16 +- django/core/exceptions.py | 6 + django/db/models/__init__.py | 4 + django/db/models/base.py | 25 +++- django/db/models/fetch_modes.py | 52 +++++++ .../db/models/fields/related_descriptors.py | 67 ++++++--- django/db/models/query.py | 33 ++++- django/db/models/query_utils.py | 17 ++- docs/ref/exceptions.txt | 10 ++ docs/ref/models/instances.txt | 8 +- docs/ref/models/querysets.txt | 60 +++++--- docs/releases/6.1.txt | 45 ++++++ docs/spelling_wordlist | 1 + docs/topics/db/fetch-modes.txt | 138 ++++++++++++++++++ docs/topics/db/index.txt | 1 + docs/topics/db/optimization.txt | 48 ++++-- docs/topics/db/queries.txt | 6 + tests/basic/tests.py | 1 + tests/defer/tests.py | 68 ++++++++- tests/generic_relations/tests.py | 43 +++++- tests/many_to_one/tests.py | 27 +++- tests/one_to_one/tests.py | 38 +++++ tests/prefetch_related/tests.py | 5 + tests/raw_query/tests.py | 36 ++++- 24 files changed, 682 insertions(+), 73 deletions(-) create mode 100644 django/db/models/fetch_modes.py create mode 100644 docs/topics/db/fetch-modes.txt diff --git a/django/contrib/contenttypes/fields.py b/django/contrib/contenttypes/fields.py index f98dda1255..aa41eab370 100644 --- a/django/contrib/contenttypes/fields.py +++ b/django/contrib/contenttypes/fields.py @@ -16,6 +16,7 @@ from django.db.models.fields.related import ( ReverseManyToOneDescriptor, lazy_related_operation, ) +from django.db.models.query import prefetch_related_objects from django.db.models.query_utils import PathInfo from django.db.models.sql import AND from django.db.models.sql.where import WhereNode @@ -253,6 +254,15 @@ class GenericForeignKeyDescriptor: return rel_obj else: rel_obj = None + + instance._state.fetch_mode.fetch(self, instance) + return self.field.get_cached_value(instance) + + def fetch_one(self, instance): + f = self.field.model._meta.get_field(self.field.ct_field) + ct_id = getattr(instance, f.attname, None) + pk_val = getattr(instance, self.field.fk_field) + rel_obj = None if ct_id is not None: ct = self.field.get_content_type(id=ct_id, using=instance._state.db) try: @@ -262,7 +272,11 @@ class GenericForeignKeyDescriptor: except ObjectDoesNotExist: pass self.field.set_cached_value(instance, rel_obj) - return rel_obj + + def fetch_many(self, instances): + is_cached = self.field.is_cached + missing_instances = [i for i in instances if not is_cached(i)] + return prefetch_related_objects(missing_instances, self.field.name) def __set__(self, instance, value): ct = None diff --git a/django/core/exceptions.py b/django/core/exceptions.py index cbc80bd78f..0e24f6cb18 100644 --- a/django/core/exceptions.py +++ b/django/core/exceptions.py @@ -132,6 +132,12 @@ class FieldError(Exception): pass +class FieldFetchBlocked(FieldError): + """On-demand fetching of a model field blocked.""" + + pass + + NON_FIELD_ERRORS = "__all__" diff --git a/django/db/models/__init__.py b/django/db/models/__init__.py index ec54b65240..f15ddecfaa 100644 --- a/django/db/models/__init__.py +++ b/django/db/models/__init__.py @@ -36,6 +36,7 @@ from django.db.models.expressions import ( WindowFrame, WindowFrameExclusion, ) +from django.db.models.fetch_modes import FETCH_ONE, FETCH_PEERS, RAISE from django.db.models.fields import * # NOQA from django.db.models.fields import __all__ as fields_all from django.db.models.fields.composite import CompositePrimaryKey @@ -105,6 +106,9 @@ __all__ += [ "GeneratedField", "JSONField", "OrderWrt", + "FETCH_ONE", + "FETCH_PEERS", + "RAISE", "Lookup", "Transform", "Manager", diff --git a/django/db/models/base.py b/django/db/models/base.py index fd51052d01..b92a198660 100644 --- a/django/db/models/base.py +++ b/django/db/models/base.py @@ -32,6 +32,7 @@ from django.db.models import NOT_PROVIDED, ExpressionWrapper, IntegerField, Max, from django.db.models.constants import LOOKUP_SEP from django.db.models.deletion import CASCADE, Collector from django.db.models.expressions import DatabaseDefault +from django.db.models.fetch_modes import FETCH_ONE from django.db.models.fields.composite import CompositePrimaryKey from django.db.models.fields.related import ( ForeignObjectRel, @@ -466,6 +467,14 @@ class ModelStateFieldsCacheDescriptor: return res +class ModelStateFetchModeDescriptor: + def __get__(self, instance, cls=None): + if instance is None: + return self + res = instance.fetch_mode = FETCH_ONE + return res + + class ModelState: """Store model instance state.""" @@ -476,6 +485,14 @@ class ModelState: # on the actual save. adding = True fields_cache = ModelStateFieldsCacheDescriptor() + fetch_mode = ModelStateFetchModeDescriptor() + peers = () + + def __getstate__(self): + state = self.__dict__.copy() + # Weak references can't be pickled. + state.pop("peers", None) + return state class Model(AltersData, metaclass=ModelBase): @@ -595,7 +612,7 @@ class Model(AltersData, metaclass=ModelBase): post_init.send(sender=cls, instance=self) @classmethod - def from_db(cls, db, field_names, values): + def from_db(cls, db, field_names, values, *, fetch_mode=None): if len(values) != len(cls._meta.concrete_fields): values_iter = iter(values) values = [ @@ -605,6 +622,8 @@ class Model(AltersData, metaclass=ModelBase): new = cls(*values) new._state.adding = False new._state.db = db + if fetch_mode is not None: + new._state.fetch_mode = fetch_mode return new def __repr__(self): @@ -714,8 +733,8 @@ class Model(AltersData, metaclass=ModelBase): should be an iterable of field attnames. If fields is None, then all non-deferred fields are reloaded. - When accessing deferred fields of an instance, the deferred loading - of the field will call this method. + When fetching deferred fields for a single instance (the FETCH_ONE + fetch mode), the deferred loading uses this method. """ if fields is None: self._prefetched_objects_cache = {} diff --git a/django/db/models/fetch_modes.py b/django/db/models/fetch_modes.py new file mode 100644 index 0000000000..a22ccd8a23 --- /dev/null +++ b/django/db/models/fetch_modes.py @@ -0,0 +1,52 @@ +from django.core.exceptions import FieldFetchBlocked + + +class FetchMode: + __slots__ = () + + track_peers = False + + def fetch(self, fetcher, instance): + raise NotImplementedError("Subclasses must implement this method.") + + +class FetchOne(FetchMode): + __slots__ = () + + def fetch(self, fetcher, instance): + fetcher.fetch_one(instance) + + +FETCH_ONE = FetchOne() + + +class FetchPeers(FetchMode): + __slots__ = () + + track_peers = True + + def fetch(self, fetcher, instance): + instances = [ + peer + for peer_weakref in instance._state.peers + if (peer := peer_weakref()) is not None + ] + if len(instances) > 1: + fetcher.fetch_many(instances) + else: + fetcher.fetch_one(instance) + + +FETCH_PEERS = FetchPeers() + + +class Raise(FetchMode): + __slots__ = () + + def fetch(self, fetcher, instance): + klass = instance.__class__.__qualname__ + field_name = fetcher.field.name + raise FieldFetchBlocked(f"Fetching of {klass}.{field_name} blocked.") from None + + +RAISE = Raise() diff --git a/django/db/models/fields/related_descriptors.py b/django/db/models/fields/related_descriptors.py index 3e2150e0f6..2c8e59f1d9 100644 --- a/django/db/models/fields/related_descriptors.py +++ b/django/db/models/fields/related_descriptors.py @@ -78,7 +78,7 @@ from django.db.models.expressions import ColPairs from django.db.models.fields.tuple_lookups import TupleIn from django.db.models.functions import RowNumber from django.db.models.lookups import GreaterThan, LessThanOrEqual -from django.db.models.query import QuerySet +from django.db.models.query import QuerySet, prefetch_related_objects from django.db.models.query_utils import DeferredAttribute from django.db.models.utils import AltersData, resolve_callables from django.utils.functional import cached_property @@ -254,13 +254,9 @@ class ForwardManyToOneDescriptor: break if rel_obj is None and has_value: - rel_obj = self.get_object(instance) - remote_field = self.field.remote_field - # If this is a one-to-one relation, set the reverse accessor - # cache on the related object to the current instance to avoid - # an extra SQL query if it's accessed later on. - if not remote_field.multiple: - remote_field.set_cached_value(rel_obj, instance) + instance._state.fetch_mode.fetch(self, instance) + return self.field.get_cached_value(instance) + self.field.set_cached_value(instance, rel_obj) if rel_obj is None and not self.field.null: @@ -270,6 +266,21 @@ class ForwardManyToOneDescriptor: else: return rel_obj + def fetch_one(self, instance): + rel_obj = self.get_object(instance) + self.field.set_cached_value(instance, rel_obj) + # If this is a one-to-one relation, set the reverse accessor cache on + # the related object to the current instance to avoid an extra SQL + # query if it's accessed later on. + remote_field = self.field.remote_field + if not remote_field.multiple: + remote_field.set_cached_value(rel_obj, instance) + + def fetch_many(self, instances): + is_cached = self.is_cached + missing_instances = [i for i in instances if not is_cached(i)] + prefetch_related_objects(missing_instances, self.field.name) + def __set__(self, instance, value): """ Set the related instance through the forward relation. @@ -504,16 +515,8 @@ class ReverseOneToOneDescriptor: if not instance._is_pk_set(): rel_obj = None else: - filter_args = self.related.field.get_forward_related_filter(instance) - try: - rel_obj = self.get_queryset(instance=instance).get(**filter_args) - except self.related.related_model.DoesNotExist: - rel_obj = None - else: - # Set the forward accessor cache on the related object to - # the current instance to avoid an extra SQL query if it's - # accessed later on. - self.related.field.set_cached_value(rel_obj, instance) + instance._state.fetch_mode.fetch(self, instance) + rel_obj = self.related.get_cached_value(instance) self.related.set_cached_value(instance, rel_obj) if rel_obj is None: @@ -524,6 +527,34 @@ class ReverseOneToOneDescriptor: else: return rel_obj + @property + def field(self): + """ + Add compatibility with the fetcher protocol. While self.related is not + a field but a OneToOneRel, it quacks enough like a field to work. + """ + return self.related + + def fetch_one(self, instance): + # Kept for backwards compatibility with overridden + # get_forward_related_filter() + filter_args = self.related.field.get_forward_related_filter(instance) + try: + rel_obj = self.get_queryset(instance=instance).get(**filter_args) + except self.related.related_model.DoesNotExist: + rel_obj = None + else: + self.related.field.set_cached_value(rel_obj, instance) + self.related.set_cached_value(instance, rel_obj) + + def fetch_many(self, instances): + is_cached = self.is_cached + missing_instances = [i for i in instances if not is_cached(i)] + prefetch_related_objects( + missing_instances, + self.related.get_accessor_name(), + ) + def __set__(self, instance, value): """ Set the related instance through the reverse relation. diff --git a/django/db/models/query.py b/django/db/models/query.py index 39cc9b6cb3..0811b90b5e 100644 --- a/django/db/models/query.py +++ b/django/db/models/query.py @@ -8,6 +8,7 @@ import warnings from contextlib import nullcontext from functools import reduce from itertools import chain, islice +from weakref import ref as weak_ref from asgiref.sync import sync_to_async @@ -26,6 +27,7 @@ from django.db.models import AutoField, DateField, DateTimeField, Field, Max, sq from django.db.models.constants import LOOKUP_SEP, OnConflict from django.db.models.deletion import Collector from django.db.models.expressions import Case, DatabaseDefault, F, Value, When +from django.db.models.fetch_modes import FETCH_ONE from django.db.models.functions import Cast, Trunc from django.db.models.query_utils import FilteredRelation, Q from django.db.models.sql.constants import GET_ITERATOR_CHUNK_SIZE, ROW_COUNT @@ -122,10 +124,18 @@ class ModelIterable(BaseIterable): ) for field, related_objs in queryset._known_related_objects.items() ] + fetch_mode = queryset._fetch_mode + peers = [] for row in compiler.results_iter(results): obj = model_cls.from_db( - db, init_list, row[model_fields_start:model_fields_end] + db, + init_list, + row[model_fields_start:model_fields_end], + fetch_mode=fetch_mode, ) + if fetch_mode.track_peers: + peers.append(weak_ref(obj)) + obj._state.peers = peers for rel_populator in related_populators: rel_populator.populate(row, obj) if annotation_col_map: @@ -183,10 +193,17 @@ class RawModelIterable(BaseIterable): query_iterator = compiler.composite_fields_to_tuples( query_iterator, cols ) + fetch_mode = self.queryset._fetch_mode + peers = [] for values in query_iterator: # Associate fields to values model_init_values = [values[pos] for pos in model_init_pos] - instance = model_cls.from_db(db, model_init_names, model_init_values) + instance = model_cls.from_db( + db, model_init_names, model_init_values, fetch_mode=fetch_mode + ) + if fetch_mode.track_peers: + peers.append(weak_ref(instance)) + instance._state.peers = peers if annotation_fields: for column, pos in annotation_fields: setattr(instance, column, values[pos]) @@ -293,6 +310,7 @@ class QuerySet(AltersData): self._prefetch_done = False self._known_related_objects = {} # {rel_field: {pk: rel_obj}} self._iterable_class = ModelIterable + self._fetch_mode = FETCH_ONE self._fields = None self._defer_next_filter = False self._deferred_filter = None @@ -1442,6 +1460,7 @@ class QuerySet(AltersData): params=params, translations=translations, using=using, + fetch_mode=self._fetch_mode, ) qs._prefetch_related_lookups = self._prefetch_related_lookups[:] return qs @@ -1913,6 +1932,12 @@ class QuerySet(AltersData): clone._db = alias return clone + def fetch_mode(self, fetch_mode): + """Set the fetch mode for the QuerySet.""" + clone = self._chain() + clone._fetch_mode = fetch_mode + return clone + ################################### # PUBLIC INTROSPECTION ATTRIBUTES # ################################### @@ -2051,6 +2076,7 @@ class QuerySet(AltersData): c._prefetch_related_lookups = self._prefetch_related_lookups[:] c._known_related_objects = self._known_related_objects c._iterable_class = self._iterable_class + c._fetch_mode = self._fetch_mode c._fields = self._fields return c @@ -2186,6 +2212,7 @@ class RawQuerySet: translations=None, using=None, hints=None, + fetch_mode=FETCH_ONE, ): self.raw_query = raw_query self.model = model @@ -2197,6 +2224,7 @@ class RawQuerySet: self._result_cache = None self._prefetch_related_lookups = () self._prefetch_done = False + self._fetch_mode = fetch_mode def resolve_model_init_order(self): """Resolve the init field names and value positions.""" @@ -2295,6 +2323,7 @@ class RawQuerySet: params=self.params, translations=self.translations, using=alias, + fetch_mode=self._fetch_mode, ) @cached_property diff --git a/django/db/models/query_utils.py b/django/db/models/query_utils.py index c383b80640..23d543211a 100644 --- a/django/db/models/query_utils.py +++ b/django/db/models/query_utils.py @@ -264,7 +264,8 @@ class DeferredAttribute: f"Cannot retrieve deferred field {field_name!r} " "from an unsaved model." ) - instance.refresh_from_db(fields=[field_name]) + + instance._state.fetch_mode.fetch(self, instance) else: data[field_name] = val return data[field_name] @@ -281,6 +282,20 @@ class DeferredAttribute: return getattr(instance, link_field.attname) return None + def fetch_one(self, instance): + instance.refresh_from_db(fields=[self.field.attname]) + + def fetch_many(self, instances): + attname = self.field.attname + db = instances[0]._state.db + value_by_pk = ( + self.field.model._base_manager.using(db) + .values_list(attname) + .in_bulk({i.pk for i in instances}) + ) + for instance in instances: + setattr(instance, attname, value_by_pk[instance.pk]) + class class_or_instance_method: """ diff --git a/docs/ref/exceptions.txt b/docs/ref/exceptions.txt index bbd959e95d..93c6ec4203 100644 --- a/docs/ref/exceptions.txt +++ b/docs/ref/exceptions.txt @@ -165,6 +165,16 @@ Django core exception classes are defined in ``django.core.exceptions``. - A field name is invalid - A query contains invalid order_by arguments +``FieldFetchBlocked`` +--------------------- + +.. versionadded:: 6.1 + +.. exception:: FieldFetchBlocked + + Raised when a field would be fetched on-demand and the + :attr:`~django.db.models.RAISE` fetch mode is active. + ``ValidationError`` ------------------- diff --git a/docs/ref/models/instances.txt b/docs/ref/models/instances.txt index c8cf5957ba..2ce8dc4a36 100644 --- a/docs/ref/models/instances.txt +++ b/docs/ref/models/instances.txt @@ -180,10 +180,10 @@ update, you could write a test similar to this:: obj.refresh_from_db() self.assertEqual(obj.val, 2) -Note that when deferred fields are accessed, the loading of the deferred -field's value happens through this method. Thus it is possible to customize -the way deferred loading happens. The example below shows how one can reload -all of the instance's fields when a deferred field is reloaded:: +When a deferred field is loaded on-demand for a single model instance, the +loading happens through this method. Thus it is possible to customize the way +this loading happens. The example below shows how one can reload all of the +instance's fields when a deferred field is loaded on-demand:: class ExampleModel(models.Model): def refresh_from_db(self, using=None, fields=None, **kwargs): diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt index f290970d2c..3840a2f97e 100644 --- a/docs/ref/models/querysets.txt +++ b/docs/ref/models/querysets.txt @@ -1022,15 +1022,38 @@ Uses SQL's ``EXCEPT`` operator to keep only elements present in the See :meth:`union` for some restrictions. +``fetch_mode()`` +~~~~~~~~~~~~~~~~ + +.. versionadded:: 6.1 + +.. method:: fetch_mode(mode) + +Returns a ``QuerySet`` that sets the given fetch mode for all model instances +created by this ``QuerySet``. The fetch mode controls on-demand loading of +fields when they are accessed, such as for foreign keys and deferred fields. +For example, to use the :attr:`~django.db.models.FETCH_PEERS` mode to +batch-load all related objects on first access: + +.. code-block:: python + + from django.db import models + + books = Book.objects.fetch_mode(models.FETCH_PEERS) + +See more in the :doc:`fetch mode topic guide `. + ``select_related()`` ~~~~~~~~~~~~~~~~~~~~ .. method:: select_related(*fields) -Returns a ``QuerySet`` that will "follow" foreign-key relationships, selecting -additional related-object data when it executes its query. This is a -performance booster which results in a single more complex query but means -later use of foreign-key relationships won't require database queries. +Returns a ``QuerySet`` that will join in the named foreign-key relationships, +selecting additional related objects when it executes its query. This method +can be a performance booster, fetching data ahead of time rather than +triggering on-demand loading through the model instances' +:doc:`fetch mode `, at the cost of a more complex +initial query. The following examples illustrate the difference between plain lookups and ``select_related()`` lookups. Here's standard lookup:: @@ -1050,20 +1073,8 @@ And here's ``select_related`` lookup:: # in the previous query. b = e.blog -You can use ``select_related()`` with any queryset of objects:: - - from django.utils import timezone - - # Find all the blogs with entries scheduled to be published in the future. - blogs = set() - - for e in Entry.objects.filter(pub_date__gt=timezone.now()).select_related("blog"): - # Without select_related(), this would make a database query for each - # loop iteration in order to fetch the related blog for each entry. - blogs.add(e.blog) - -The order of ``filter()`` and ``select_related()`` chaining isn't important. -These querysets are equivalent:: +You can use ``select_related()`` with any queryset. The order of chaining with +other methods isn't important. For example, these querysets are equivalent:: Entry.objects.filter(pub_date__gt=timezone.now()).select_related("blog") Entry.objects.select_related("blog").filter(pub_date__gt=timezone.now()) @@ -1141,12 +1152,15 @@ that is that ``select_related('foo', 'bar')`` is equivalent to .. method:: prefetch_related(*lookups) -Returns a ``QuerySet`` that will automatically retrieve, in a single batch, -related objects for each of the specified lookups. +Returns a ``QuerySet`` that will automatically retrieve the given lookups, each +in one extra batch query. Prefetching is a way to optimize database access +when you know you'll be accessing related objects later, so you can avoid +triggering the on-demand loading behavior of the model instances' +:doc:`fetch mode `. -This has a similar purpose to ``select_related``, in that both are designed to -stop the deluge of database queries that is caused by accessing related -objects, but the strategy is quite different. +This method has a similar purpose to :meth:`select_related`, in that both are +designed to eagerly fetch related objects. However, they work in different +ways. ``select_related`` works by creating an SQL join and including the fields of the related object in the ``SELECT`` statement. For this reason, diff --git a/docs/releases/6.1.txt b/docs/releases/6.1.txt index 5e852785d9..80470dbcd6 100644 --- a/docs/releases/6.1.txt +++ b/docs/releases/6.1.txt @@ -26,6 +26,51 @@ only officially support, the latest release of each series. What's new in Django 6.1 ======================== +Model field fetch modes +----------------------- + +The on-demand fetching behavior of model fields is now configurable with +:doc:`fetch modes `. These modes allow you to control +how Django fetches data from the database when an unfetched field is accessed. + +Django provides three fetch modes: + +1. ``FETCH_ONE``, the default, fetches the missing field for the current + instance only. This mode represents Django's existing behavior. + +2. ``FETCH_PEERS`` fetches a missing field for all instances that came from + the same :class:`~django.db.models.query.QuerySet`. + + This mode works like an on-demand ``prefetch_related()``. It can reduce most + cases of the "N+1 queries problem" to two queries without any work to + maintain a list of fields to prefetch. + +3. ``RAISE`` raises a :exc:`~django.core.exceptions.FieldFetchBlocked` + exception. + + This mode can prevent unintentional queries in performance-critical + sections of code. + +Use the new method :meth:`.QuerySet.fetch_mode` to set the fetch mode for model +instances fetched by the ``QuerySet``: + +.. code-block:: python + + from django.db import models + + books = Book.objects.fetch_mode(models.FETCH_PEERS) + for book in books: + print(book.author.name) + +Despite the loop accessing the ``author`` foreign key on each instance, the +``FETCH_PEERS`` fetch mode will make the above example perform only two +queries: + +1. Fetch all books. +2. Fetch associated authors. + +See :doc:`fetch modes ` for more details. + Minor features -------------- diff --git a/docs/spelling_wordlist b/docs/spelling_wordlist index 2898f85d5b..b35c94fc10 100644 --- a/docs/spelling_wordlist +++ b/docs/spelling_wordlist @@ -535,6 +535,7 @@ unencrypted unescape unescaped unevaluated +unfetched unglamorous ungrouped unhandled diff --git a/docs/topics/db/fetch-modes.txt b/docs/topics/db/fetch-modes.txt new file mode 100644 index 0000000000..e76bb28a59 --- /dev/null +++ b/docs/topics/db/fetch-modes.txt @@ -0,0 +1,138 @@ +=========== +Fetch modes +=========== + +.. versionadded:: 6.1 + +.. module:: django.db.models.fetch_modes + +.. currentmodule:: django.db.models + +When accessing model fields that were not loaded as part of the original query, +Django will fetch that field's data from the database. You can customize the +behavior of this fetching with a **fetch mode**, making it more efficient or +even blocking it. + +Use :meth:`.QuerySet.fetch_mode` to set the fetch mode for model +instances fetched by a ``QuerySet``: + +.. code-block:: python + + from django.db import models + + books = Book.objects.fetch_mode(models.FETCH_PEERS) + +Fetch modes apply to: + +* :class:`~django.db.models.ForeignKey` fields +* :class:`~django.db.models.OneToOneField` fields and their reverse accessors +* Fields deferred with :meth:`.QuerySet.defer` or :meth:`.QuerySet.only` +* :ref:`generic-relations` + +Available modes +=============== + +.. admonition:: Referencing fetch modes + + Fetch modes are defined in ``django.db.models.fetch_modes``, but for + convenience they're imported into :mod:`django.db.models`. The standard + convention is to use ``from django.db import models`` and refer to the + fetch modes as ``models.``. + +Django provides three fetch modes. We'll explain them below using these models: + +.. code-block:: python + + from django.db import models + + + class Author(models.Model): ... + + + class Book(models.Model): + author = models.ForeignKey(Author, on_delete=models.CASCADE) + ... + +…and this loop: + +.. code-block:: python + + for book in books: + print(book.author.name) + +…where ``books`` is a ``QuerySet`` of ``Book`` instances using some fetch mode. + +.. attribute:: FETCH_ONE + +Fetches the missing field for the current instance only. This is the default +mode. + +Using ``FETCH_ONE`` for the above example would use: + +* 1 query to fetch ``books`` +* N queries, where N is the number of books, to fetch the missing ``author`` + field + +…for a total of 1+N queries. This query pattern is known as the "N+1 queries +problem" because it often leads to performance issues when N is large. + +.. attribute:: FETCH_PEERS + +Fetches the missing field for the current instance and its "peers"—instances +that came from the same initial ``QuerySet``. The behavior of this mode is +based on the assumption that if you need a field for one instance, you probably +need it for all instances in the same batch, since you'll likely process them +all identically. + +Using ``FETCH_PEERS`` for the above example would use: + +* 1 query to fetch ``books`` +* 1 query to fetch all missing ``author`` fields for the batch of books + +…for a total of 2 queries. The batch query makes this mode a lot more efficient +than ``FETCH_ONE`` and is similar to an on-demand call to +:meth:`.QuerySet.prefetch_related` or +:func:`~django.db.models.prefetch_related_objects`. Using ``FETCH_PEERS`` can +reduce most cases of the "N+1 queries problem" to two queries without +much effort. + +The "peer" instances are tracked in a list of weak references, to avoid +memory leaks where some peer instances are discarded. + +.. attribute:: RAISE + +Raises a :exc:`~django.core.exceptions.FieldFetchBlocked` exception. + +Using ``RAISE`` for the above example would raise an exception at the access of +``book.author`` access, like: + +.. code-block:: python + + FieldFetchBlocked("Fetching of Primary.value blocked.") + +This mode can prevent unintentional queries in performance-critical +sections of code. + +.. _fetch-modes-custom-manager: + +Make a fetch mode the default for a model class +=============================================== + +Set the default fetch mode for a model class with a +:ref:`custom manager ` that overrides ``get_queryset()``: + +.. code-block:: python + + from django.db import models + + + class BookManager(models.Manager): + def get_queryset(self): + return super().get_queryset().fetch_mode(models.FETCH_PEERS) + + + class Book(models.Model): + title = models.TextField() + author = models.ForeignKey("Author", on_delete=models.CASCADE) + + objects = BookManager() diff --git a/docs/topics/db/index.txt b/docs/topics/db/index.txt index 67a71fd820..6caf9f15e9 100644 --- a/docs/topics/db/index.txt +++ b/docs/topics/db/index.txt @@ -13,6 +13,7 @@ Generally, each model maps to a single database table. models queries + fetch-modes aggregation search managers diff --git a/docs/topics/db/optimization.txt b/docs/topics/db/optimization.txt index bb70efa362..3be0bd2cb5 100644 --- a/docs/topics/db/optimization.txt +++ b/docs/topics/db/optimization.txt @@ -196,28 +196,46 @@ thousands of records are returned. The penalty will be compounded if the database lives on a separate server, where network overhead and latency also play a factor. -Retrieve everything at once if you know you will need it -======================================================== +Retrieve related objects efficiently +==================================== -Hitting the database multiple times for different parts of a single 'set' of -data that you will need all parts of is, in general, less efficient than -retrieving it all in one query. This is particularly important if you have a -query that is executed in a loop, and could therefore end up doing many -database queries, when only one was needed. So: +Generally, accessing the database multiple times to retrieve different parts +of a single "set" of data is less efficient than retrieving it all in one +query. This is particularly important if you have a query that is executed in a +loop, and could therefore end up doing many database queries, when only one +is needed. Below are some techniques to combine queries for efficiency. + +Use the ``FETCH_PEERS`` fetch mode +---------------------------------- + +Use the :attr:`~django.db.models.FETCH_PEERS` fetch mode to make on-demand +field access more efficient with bulk-fetching. Enable all it for all usage of +your models :ref:`with a custom manager `. + +Using this fetch mode is easier than declaring fields to fetch with +:meth:`~django.db.models.query.QuerySet.select_related` or +:meth:`~django.db.models.query.QuerySet.prefetch_related`, especially when it's +hard to predict which fields will be accessed. Use ``QuerySet.select_related()`` and ``prefetch_related()`` ------------------------------------------------------------ -Understand :meth:`~django.db.models.query.QuerySet.select_related` and -:meth:`~django.db.models.query.QuerySet.prefetch_related` thoroughly, and use -them: +When the :attr:`~django.db.models.FETCH_PEERS` fetch mode is not appropriate or +efficient enough, use :meth:`~django.db.models.query.QuerySet.select_related` +and :meth:`~django.db.models.query.QuerySet.prefetch_related`. Understand their +documentation thoroughly and apply them where needed. -* in :doc:`managers and default managers ` where - appropriate. Be aware when your manager is and is not used; sometimes this is - tricky so don't make assumptions. +It may be useful to apply these methods in :doc:`managers and default managers +`. Be aware when your manager is and is not used; +sometimes this is tricky so don't make assumptions. -* in view code or other layers, possibly making use of - :func:`~django.db.models.prefetch_related_objects` where needed. +Use ``prefetch_related_objects()`` +---------------------------------- + +Where :attr:`~django.db.models.query.QuerySet.prefetch_related` would be useful +after the queryset has been evaluated, use +:func:`~django.db.models.prefetch_related_objects` to execute an extra +prefetch. Don't retrieve things you don't need ==================================== diff --git a/docs/topics/db/queries.txt b/docs/topics/db/queries.txt index d24505b039..ed1d3ea9ed 100644 --- a/docs/topics/db/queries.txt +++ b/docs/topics/db/queries.txt @@ -1702,6 +1702,12 @@ the link from the related model to the model that defines the relationship. For example, a ``Blog`` object ``b`` has a manager that returns all related ``Entry`` objects in the ``entry_set`` attribute: ``b.entry_set.all()``. +These accessors may be prefetched by the ``QuerySet`` methods +:meth:`~django.db.models.query.QuerySet.select_related` or +:meth:`~django.db.models.query.QuerySet.prefetch_related`. If not prefetched, +access will trigger an on-demand fetch through the model's +:doc:`fetch mode `. + All examples in this section use the sample ``Blog``, ``Author`` and ``Entry`` models defined at the top of this page. diff --git a/tests/basic/tests.py b/tests/basic/tests.py index 38d7d2a3d6..89aef16aef 100644 --- a/tests/basic/tests.py +++ b/tests/basic/tests.py @@ -807,6 +807,7 @@ class ManagerTest(SimpleTestCase): "alatest", "aupdate", "aupdate_or_create", + "fetch_mode", ] def test_manager_methods(self): diff --git a/tests/defer/tests.py b/tests/defer/tests.py index c0968080b1..29c63c566a 100644 --- a/tests/defer/tests.py +++ b/tests/defer/tests.py @@ -1,4 +1,5 @@ -from django.core.exceptions import FieldDoesNotExist, FieldError +from django.core.exceptions import FieldDoesNotExist, FieldError, FieldFetchBlocked +from django.db.models import FETCH_PEERS, RAISE from django.test import SimpleTestCase, TestCase from .models import ( @@ -29,6 +30,7 @@ class DeferTests(AssertionMixin, TestCase): def setUpTestData(cls): cls.s1 = Secondary.objects.create(first="x1", second="y1") cls.p1 = Primary.objects.create(name="p1", value="xx", related=cls.s1) + cls.p2 = Primary.objects.create(name="p2", value="yy", related=cls.s1) def test_defer(self): qs = Primary.objects.all() @@ -141,7 +143,6 @@ class DeferTests(AssertionMixin, TestCase): def test_saving_object_with_deferred_field(self): # Saving models with deferred fields is possible (but inefficient, # since every field has to be retrieved first). - Primary.objects.create(name="p2", value="xy", related=self.s1) obj = Primary.objects.defer("value").get(name="p2") obj.name = "a new name" obj.save() @@ -181,10 +182,71 @@ class DeferTests(AssertionMixin, TestCase): self.assertEqual(obj.name, "adonis") def test_defer_fk_attname(self): - primary = Primary.objects.defer("related_id").get() + primary = Primary.objects.defer("related_id").get(name="p1") with self.assertNumQueries(1): self.assertEqual(primary.related_id, self.p1.related_id) + def test_only_fetch_mode_fetch_peers(self): + p1, p2 = Primary.objects.fetch_mode(FETCH_PEERS).only("name") + with self.assertNumQueries(1): + p1.value + with self.assertNumQueries(0): + p2.value + + def test_only_fetch_mode_fetch_peers_single(self): + p1 = Primary.objects.fetch_mode(FETCH_PEERS).only("name").get(name="p1") + with self.assertNumQueries(1): + p1.value + + def test_defer_fetch_mode_fetch_peers(self): + p1, p2 = Primary.objects.fetch_mode(FETCH_PEERS).defer("value") + with self.assertNumQueries(1): + p1.value + with self.assertNumQueries(0): + p2.value + + def test_defer_fetch_mode_fetch_peers_single(self): + p1 = Primary.objects.fetch_mode(FETCH_PEERS).defer("value").get(name="p1") + with self.assertNumQueries(1): + p1.value + + def test_only_fetch_mode_raise(self): + p1 = Primary.objects.fetch_mode(RAISE).only("name").get(name="p1") + msg = "Fetching of Primary.value blocked." + with self.assertRaisesMessage(FieldFetchBlocked, msg) as cm: + p1.value + self.assertIsNone(cm.exception.__cause__) + self.assertTrue(cm.exception.__suppress_context__) + + def test_defer_fetch_mode_raise(self): + p1 = Primary.objects.fetch_mode(RAISE).defer("value").get(name="p1") + msg = "Fetching of Primary.value blocked." + with self.assertRaisesMessage(FieldFetchBlocked, msg) as cm: + p1.value + self.assertIsNone(cm.exception.__cause__) + self.assertTrue(cm.exception.__suppress_context__) + + +class DeferOtherDatabaseTests(TestCase): + databases = {"other"} + + @classmethod + def setUpTestData(cls): + cls.s1 = Secondary.objects.using("other").create(first="x1", second="y1") + cls.p1 = Primary.objects.using("other").create( + name="p1", value="xx", related=cls.s1 + ) + cls.p2 = Primary.objects.using("other").create( + name="p2", value="yy", related=cls.s1 + ) + + def test_defer_fetch_mode_fetch_peers(self): + p1, p2 = Primary.objects.using("other").fetch_mode(FETCH_PEERS).defer("value") + with self.assertNumQueries(1, using="other"): + p1.value + with self.assertNumQueries(0, using="other"): + p2.value + class BigChildDeferTests(AssertionMixin, TestCase): @classmethod diff --git a/tests/generic_relations/tests.py b/tests/generic_relations/tests.py index 1b53dbd8f4..3de243d7b8 100644 --- a/tests/generic_relations/tests.py +++ b/tests/generic_relations/tests.py @@ -1,7 +1,8 @@ from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.prefetch import GenericPrefetch -from django.core.exceptions import FieldError +from django.core.exceptions import FieldError, FieldFetchBlocked from django.db.models import Q, prefetch_related_objects +from django.db.models.fetch_modes import FETCH_PEERS, RAISE from django.test import SimpleTestCase, TestCase, skipUnlessDBFeature from .models import ( @@ -780,6 +781,46 @@ class GenericRelationsTests(TestCase): self.platypus.latin_name, ) + def test_fetch_mode_fetch_peers(self): + TaggedItem.objects.bulk_create( + [ + TaggedItem(tag="lion", content_object=self.lion), + TaggedItem(tag="platypus", content_object=self.platypus), + TaggedItem(tag="quartz", content_object=self.quartz), + ] + ) + # Peers fetching should fetch all related peers GFKs at once which is + # one query per content type. + with self.assertNumQueries(1): + quartz_tag, platypus_tag, lion_tag = TaggedItem.objects.fetch_mode( + FETCH_PEERS + ).order_by("-pk")[:3] + with self.assertNumQueries(2): + self.assertEqual(lion_tag.content_object, self.lion) + with self.assertNumQueries(0): + self.assertEqual(platypus_tag.content_object, self.platypus) + self.assertEqual(quartz_tag.content_object, self.quartz) + # It should ignore already cached instances though. + with self.assertNumQueries(1): + quartz_tag, platypus_tag, lion_tag = TaggedItem.objects.fetch_mode( + FETCH_PEERS + ).order_by("-pk")[:3] + with self.assertNumQueries(2): + self.assertEqual(quartz_tag.content_object, self.quartz) + self.assertEqual(lion_tag.content_object, self.lion) + with self.assertNumQueries(0): + self.assertEqual(platypus_tag.content_object, self.platypus) + self.assertEqual(quartz_tag.content_object, self.quartz) + + def test_fetch_mode_raise(self): + TaggedItem.objects.create(tag="lion", content_object=self.lion) + tag = TaggedItem.objects.fetch_mode(RAISE).get(tag="yellow") + msg = "Fetching of TaggedItem.content_object blocked." + with self.assertRaisesMessage(FieldFetchBlocked, msg) as cm: + tag.content_object + self.assertIsNone(cm.exception.__cause__) + self.assertTrue(cm.exception.__suppress_context__) + class ProxyRelatedModelTest(TestCase): def test_default_behavior(self): diff --git a/tests/many_to_one/tests.py b/tests/many_to_one/tests.py index ac43c0da95..c5fa458570 100644 --- a/tests/many_to_one/tests.py +++ b/tests/many_to_one/tests.py @@ -1,8 +1,13 @@ import datetime from copy import deepcopy -from django.core.exceptions import FieldError, MultipleObjectsReturned +from django.core.exceptions import ( + FieldError, + FieldFetchBlocked, + MultipleObjectsReturned, +) from django.db import IntegrityError, models, transaction +from django.db.models import FETCH_PEERS, RAISE from django.test import TestCase from django.utils.translation import gettext_lazy @@ -916,3 +921,23 @@ class ManyToOneTests(TestCase): instances=countries, querysets=[City.objects.all(), City.objects.all()], ) + + def test_fetch_mode_fetch_peers_forward(self): + Article.objects.create( + headline="This is another test", + pub_date=datetime.date(2005, 7, 27), + reporter=self.r2, + ) + a1, a2 = Article.objects.fetch_mode(FETCH_PEERS) + with self.assertNumQueries(1): + a1.reporter + with self.assertNumQueries(0): + a2.reporter + + def test_fetch_mode_raise_forward(self): + a = Article.objects.fetch_mode(RAISE).get(pk=self.a.pk) + msg = "Fetching of Article.reporter blocked." + with self.assertRaisesMessage(FieldFetchBlocked, msg) as cm: + a.reporter + self.assertIsNone(cm.exception.__cause__) + self.assertTrue(cm.exception.__suppress_context__) diff --git a/tests/one_to_one/tests.py b/tests/one_to_one/tests.py index d9bcb5d4dc..da7bd992c0 100644 --- a/tests/one_to_one/tests.py +++ b/tests/one_to_one/tests.py @@ -1,4 +1,6 @@ +from django.core.exceptions import FieldFetchBlocked from django.db import IntegrityError, connection, transaction +from django.db.models import FETCH_PEERS, RAISE from django.test import TestCase from .models import ( @@ -619,3 +621,39 @@ class OneToOneTests(TestCase): instances=places, querysets=[Bar.objects.all(), Bar.objects.all()], ) + + def test_fetch_mode_fetch_peers_forward(self): + Restaurant.objects.create( + place=self.p2, serves_hot_dogs=True, serves_pizza=False + ) + r1, r2 = Restaurant.objects.fetch_mode(FETCH_PEERS) + with self.assertNumQueries(1): + r1.place + with self.assertNumQueries(0): + r2.place + + def test_fetch_mode_fetch_peers_reverse(self): + Restaurant.objects.create( + place=self.p2, serves_hot_dogs=True, serves_pizza=False + ) + p1, p2 = Place.objects.fetch_mode(FETCH_PEERS) + with self.assertNumQueries(1): + p1.restaurant + with self.assertNumQueries(0): + p2.restaurant + + def test_fetch_mode_raise_forward(self): + r = Restaurant.objects.fetch_mode(RAISE).get(pk=self.r1.pk) + msg = "Fetching of Restaurant.place blocked." + with self.assertRaisesMessage(FieldFetchBlocked, msg) as cm: + r.place + self.assertIsNone(cm.exception.__cause__) + self.assertTrue(cm.exception.__suppress_context__) + + def test_fetch_mode_raise_reverse(self): + p = Place.objects.fetch_mode(RAISE).get(pk=self.p1.pk) + msg = "Fetching of Place.restaurant blocked." + with self.assertRaisesMessage(FieldFetchBlocked, msg) as cm: + p.restaurant + self.assertIsNone(cm.exception.__cause__) + self.assertTrue(cm.exception.__suppress_context__) diff --git a/tests/prefetch_related/tests.py b/tests/prefetch_related/tests.py index 54b197ad83..6e4acdddf6 100644 --- a/tests/prefetch_related/tests.py +++ b/tests/prefetch_related/tests.py @@ -4,6 +4,7 @@ from django.contrib.contenttypes.models import ContentType from django.core.exceptions import ObjectDoesNotExist from django.db import NotSupportedError, connection from django.db.models import F, Prefetch, QuerySet, prefetch_related_objects +from django.db.models.fetch_modes import RAISE from django.db.models.query import get_prefetcher from django.db.models.sql import Query from django.test import ( @@ -107,6 +108,10 @@ class PrefetchRelatedTests(TestDataMixin, TestCase): normal_books = [a.first_book for a in Author.objects.all()] self.assertEqual(books, normal_books) + def test_fetch_mode_raise(self): + authors = list(Author.objects.fetch_mode(RAISE).prefetch_related("first_book")) + authors[0].first_book # No exception, already loaded + def test_foreignkey_reverse(self): with self.assertNumQueries(2): [ diff --git a/tests/raw_query/tests.py b/tests/raw_query/tests.py index 853b7ee20e..f66afbf28b 100644 --- a/tests/raw_query/tests.py +++ b/tests/raw_query/tests.py @@ -1,7 +1,8 @@ from datetime import date from decimal import Decimal -from django.core.exceptions import FieldDoesNotExist +from django.core.exceptions import FieldDoesNotExist, FieldFetchBlocked +from django.db.models import FETCH_PEERS, RAISE from django.db.models.query import RawQuerySet from django.test import TestCase, skipUnlessDBFeature @@ -158,6 +159,22 @@ class RawQueryTests(TestCase): books = Book.objects.all() self.assertSuccessfulRawQuery(Book, query, books) + def test_fk_fetch_mode_peers(self): + query = "SELECT * FROM raw_query_book" + books = list(Book.objects.fetch_mode(FETCH_PEERS).raw(query)) + with self.assertNumQueries(1): + books[0].author + books[1].author + + def test_fk_fetch_mode_raise(self): + query = "SELECT * FROM raw_query_book" + books = list(Book.objects.fetch_mode(RAISE).raw(query)) + msg = "Fetching of Book.author blocked." + with self.assertRaisesMessage(FieldFetchBlocked, msg) as cm: + books[0].author + self.assertIsNone(cm.exception.__cause__) + self.assertTrue(cm.exception.__suppress_context__) + def test_db_column_handler(self): """ Test of a simple raw query against a model containing a field with @@ -294,6 +311,23 @@ class RawQueryTests(TestCase): with self.assertRaisesMessage(FieldDoesNotExist, msg): list(Author.objects.raw(query)) + def test_missing_fields_fetch_mode_peers(self): + query = "SELECT id, first_name, dob FROM raw_query_author" + authors = list(Author.objects.fetch_mode(FETCH_PEERS).raw(query)) + with self.assertNumQueries(1): + authors[0].last_name + authors[1].last_name + + def test_missing_fields_fetch_mode_raise(self): + query = "SELECT id, first_name, dob FROM raw_query_author" + authors = list(Author.objects.fetch_mode(RAISE).raw(query)) + msg = "Fetching of Author.last_name blocked." + with self.assertRaisesMessage(FieldFetchBlocked, msg) as cm: + authors[0].last_name + self.assertIsNone(cm.exception.__cause__) + self.assertTrue(cm.exception.__suppress_context__) + self.assertTrue(cm.exception.__suppress_context__) + def test_annotations(self): query = ( "SELECT a.*, count(b.id) as book_count "