1
0
mirror of https://github.com/django/django.git synced 2025-08-22 09:49:12 +00:00

Refs #36430, #36416, #34378 -- Simplified batch size calculation in QuerySet.in_bulk().

This commit is contained in:
Jacob Walls 2025-06-03 21:53:10 -04:00 committed by Sarah Boyce
parent a2ce4900a6
commit d3cf24e9b4
3 changed files with 24 additions and 29 deletions

View File

@ -1187,10 +1187,8 @@ class QuerySet(AltersData):
if not id_list: if not id_list:
return {} return {}
filter_key = "{}__in".format(field_name) filter_key = "{}__in".format(field_name)
max_params = connections[self.db].features.max_query_params or 0
num_fields = len(opts.pk_fields) if field_name == "pk" else 1
batch_size = max_params // num_fields
id_list = tuple(id_list) id_list = tuple(id_list)
batch_size = connections[self.db].ops.bulk_batch_size([opts.pk], id_list)
# If the database has a limit on the number of query parameters # If the database has a limit on the number of query parameters
# (e.g. SQLite), retrieve objects in batches if necessary. # (e.g. SQLite), retrieve objects in batches if necessary.
if batch_size and batch_size < len(id_list): if batch_size and batch_size < len(id_list):

View File

@ -147,20 +147,24 @@ class CompositePKTests(TestCase):
result = Comment.objects.in_bulk([self.comment.pk]) result = Comment.objects.in_bulk([self.comment.pk])
self.assertEqual(result, {self.comment.pk: self.comment}) self.assertEqual(result, {self.comment.pk: self.comment})
@unittest.mock.patch.object( def test_in_bulk_batching(self):
type(connection.features), "max_query_params", new_callable=lambda: 10
)
def test_in_bulk_batching(self, mocked_max_query_params):
Comment.objects.all().delete() Comment.objects.all().delete()
num_requiring_batching = (connection.features.max_query_params // 2) + 1 batching_required = connection.features.max_query_params is not None
comments = [ expected_queries = 2 if batching_required else 1
Comment(id=i, tenant=self.tenant, user=self.user) with unittest.mock.patch.object(
for i in range(1, num_requiring_batching + 1) type(connection.features), "max_query_params", 10
] ):
Comment.objects.bulk_create(comments) num_requiring_batching = (
id_list = list(Comment.objects.values_list("pk", flat=True)) connection.ops.bulk_batch_size([Comment._meta.pk], []) + 1
with self.assertNumQueries(2): )
comment_dict = Comment.objects.in_bulk(id_list=id_list) comments = [
Comment(id=i, tenant=self.tenant, user=self.user)
for i in range(1, num_requiring_batching + 1)
]
Comment.objects.bulk_create(comments)
id_list = list(Comment.objects.values_list("pk", flat=True))
with self.assertNumQueries(expected_queries):
comment_dict = Comment.objects.in_bulk(id_list=id_list)
self.assertQuerySetEqual(comment_dict, id_list) self.assertQuerySetEqual(comment_dict, id_list)
def test_iterator(self): def test_iterator(self):

View File

@ -248,28 +248,21 @@ class LookupTests(TestCase):
with self.assertRaisesMessage(ValueError, msg): with self.assertRaisesMessage(ValueError, msg):
Article.objects.in_bulk([self.au1], field_name="author") Article.objects.in_bulk([self.au1], field_name="author")
@skipUnlessDBFeature("can_distinct_on_fields")
def test_in_bulk_preserve_ordering(self): def test_in_bulk_preserve_ordering(self):
articles = (
Article.objects.order_by("author_id", "-pub_date")
.distinct("author_id")
.in_bulk([self.au1.id, self.au2.id], field_name="author_id")
)
self.assertEqual( self.assertEqual(
articles, list(Article.objects.in_bulk([self.au2.id, self.au1.id])),
{self.au1.id: self.a4, self.au2.id: self.a5}, [self.au2.id, self.au1.id],
) )
@skipUnlessDBFeature("can_distinct_on_fields")
def test_in_bulk_preserve_ordering_with_batch_size(self): def test_in_bulk_preserve_ordering_with_batch_size(self):
qs = Article.objects.order_by("author_id", "-pub_date").distinct("author_id") qs = Article.objects.all()
with ( with (
mock.patch.object(connection.features.__class__, "max_query_params", 1), mock.patch.object(connection.ops, "bulk_batch_size", return_value=2),
self.assertNumQueries(2), self.assertNumQueries(2),
): ):
self.assertEqual( self.assertEqual(
qs.in_bulk([self.au1.id, self.au2.id], field_name="author_id"), list(qs.in_bulk([self.a4.id, self.a3.id, self.a2.id, self.a1.id])),
{self.au1.id: self.a4, self.au2.id: self.a5}, [self.a4.id, self.a3.id, self.a2.id, self.a1.id],
) )
@skipUnlessDBFeature("can_distinct_on_fields") @skipUnlessDBFeature("can_distinct_on_fields")