From ecaba3602837d1e02fe1e961f7d3bf9086453259 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anssi=20K=C3=A4=C3=A4ri=C3=A4inen?= Date: Tue, 24 Sep 2013 19:37:55 +0300 Subject: [PATCH] Improved Query join promotion logic There were multiple cases where join promotion was a bit too aggressive. This resulted in using outer joins where not necessary. Refs #21150. --- django/db/models/sql/query.py | 116 ++++++++++++++++++++-------- tests/aggregation_regress/models.py | 2 +- tests/aggregation_regress/tests.py | 23 +++++- tests/queries/tests.py | 47 +++++++---- 4 files changed, 136 insertions(+), 52 deletions(-) diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py index 9eea55e98e..ffd5af6c15 100644 --- a/django/db/models/sql/query.py +++ b/django/db/models/sql/query.py @@ -901,15 +901,15 @@ class Query(object): # Not all tables need to be joined to anything. No join type # means the later columns are ignored. join_type = None - elif outer_if_first or self.alias_map[lhs].join_type == self.LOUTER: - # We need to use LOUTER join if asked by outer_if_first or if the - # LHS table is left-joined in the query. + elif self.alias_map[lhs].join_type == self.LOUTER: join_type = self.LOUTER else: join_type = self.INNER join = JoinInfo(table, alias, join_type, lhs, join_cols or ((None, None),), nullable, join_field) self.alias_map[alias] = join + if outer_if_first: + self.promote_joins([alias]) if connection in self.join_map: self.join_map[connection] += (alias,) else: @@ -1004,17 +1004,14 @@ class Query(object): # - this is an annotation over a model field # then we need to explore the joins that are required. + # Join promotion note - we must not remove any rows here, so use + # outer join if there isn't any existing join. field, sources, opts, join_list, path = self.setup_joins( - field_list, opts, self.get_initial_alias()) + field_list, opts, self.get_initial_alias(), outer_if_first=True) # Process the join chain to see if it can be trimmed targets, _, join_list = self.trim_joins(sources, join_list, path) - # If the aggregate references a model or field that requires a join, - # those joins must be LEFT OUTER - empty join rows must be returned - # in order for zeros to be returned for those aggregates. - self.promote_joins(join_list) - col = targets[0].column source = sources[0] col = (join_list[-1], col) @@ -1089,8 +1086,69 @@ class Query(object): break return lookup_type, lookup_parts + def promote_filter_joins(self, join_list, can_reuse, lookup_type, value, + current_negated, connector): + # If the comparison is against NULL, we may need to use some left + # outer joins when creating the join chain. + # + # The logic here is that every isnull lookup in non-negated case is + # promoted when the connector is OR. In the AND case we do this only + # for first creation of the join. Join demotion happens reverse to + # this - demote always in AND case, first use only in OR case. + # + # In the OR case, a null row for the join can yield results for isnull + # lookup. But in the AND case that can't happen (assuming the other + # joins require non-null values) - if the join produces null row, then + # the ANDed condition that requires non-null value will not match, and + # hence the whole condition will not match. + # + # Consider case: (a__something & a__isnull=True) + # + # If a is null here, then a__something can't match anything (unless + # it also requires outer join), and thus the join doesn't need to be + # promoted by a__isnull. + # + # The connector isn't the only condition for removing join promotion. + # The already created joins also play a role here. Above, in the + # AND case, we don't want to promote the isnull lookup. But if we have + # only (a__isnull), then we must promote it. To see if a join needs + # to be promoted we use the seen joins inside this filter clause. That + # is contained in can_reuse - those are actually joins that have been + # built by this filter clause. + # + # Similar reasoning applies for join demotion, exception we demote + # joins in the AND case always, and never demote them in the OR case. + # + # Some examples: (a__name__isnull=True | a__type=1) + # When a__name__isnull is seen it is promoted (it is first creation of + # that join). a__type will not demote the join as it isn't first + # "a" join in the filter condition, and this is ORed query. + # (a__name__isnull=True & a__type=1) + # Here again a__name__isnull will create an outer join, but now a__type + # will demote the join back to inner join as the connector is AND. + # (a__type=1 & a__name__isnull=True) + # a__type will create inner join, a__name__isnull will not promote it + # to outer join as this is AND case and this isn't first use of the + # join. For completeness: + # (a__type=1 | a__name__isnull=True) + # The join for a__type is created as inner join. The join is promoted + # by a__name__isnull (ORed query => always promote isnull=True joins) + + if lookup_type == 'isnull' and value is True and not current_negated: + promotable_joins = join_list if connector == OR else () + if connector == AND and can_reuse is not None: + promotable_joins = (j for j in join_list if j not in can_reuse) + self.promote_joins(promotable_joins) + else: + demotable_joins = () if connector == OR else set(join_list) + if connector == OR and can_reuse is not None: + demotable_joins = set(j for j in join_list if j not in can_reuse) + for j in join_list: + if self.alias_map[j].join_type == self.LOUTER and j in demotable_joins: + self.alias_map[j] = self.alias_map[j]._replace(join_type=self.INNER) + def build_filter(self, filter_expr, branch_negated=False, current_negated=False, - can_reuse=None): + can_reuse=None, connector=AND): """ Builds a WhereNode for a single filter clause, but doesn't add it to this Query. Query.add_q() will then add this filter to the where @@ -1139,18 +1197,14 @@ class Query(object): try: field, sources, opts, join_list, path = self.setup_joins( parts, opts, alias, can_reuse, allow_many,) - if can_reuse is not None: - can_reuse.update(join_list) except MultiJoin as e: return self.split_exclude(filter_expr, LOOKUP_SEP.join(parts[:e.level]), can_reuse, e.names_with_path) - if (lookup_type == 'isnull' and value is True and not current_negated and - len(join_list) > 1): - # If the comparison is against NULL, we may need to use some left - # outer joins when creating the join chain. This is only done when - # needed, as it's less efficient at the database level. - self.promote_joins(join_list) + self.promote_filter_joins(join_list, can_reuse, lookup_type, value, + current_negated, connector) + if can_reuse is not None: + can_reuse.update(join_list) # Process the join list to see if we can remove any inner joins from # the far end (fewer tables in a query is better). Note that join @@ -1182,7 +1236,7 @@ class Query(object): return clause def add_filter(self, filter_clause): - self.where.add(self.build_filter(filter_clause), 'AND') + self.where.add(self.build_filter(filter_clause, can_reuse=self.used_aliases), 'AND') def need_having(self, obj): """ @@ -1237,14 +1291,11 @@ class Query(object): else: where_part, having_parts = self.split_having_parts( q_object.clone(), q_object.negated) - used_aliases = self.used_aliases - clause = self._add_q(where_part, used_aliases) + clause = self._add_q(where_part, self.used_aliases) self.where.add(clause, AND) for hp in having_parts: - clause = self._add_q(hp, used_aliases) + clause = self._add_q(hp, self.used_aliases) self.having.add(clause, AND) - if self.filter_is_sticky: - self.used_aliases = used_aliases def _add_q(self, q_object, used_aliases, branch_negated=False, current_negated=False): @@ -1272,7 +1323,7 @@ class Query(object): else: child_clause = self.build_filter( child, can_reuse=used_aliases, branch_negated=branch_negated, - current_negated=current_negated) + current_negated=current_negated, connector=connector) target_clause.add(child_clause, connector) if connector == OR: used = alias_diff(refcounts_before, self.alias_refcount) @@ -1445,7 +1496,7 @@ class Query(object): """ # Generate the inner query. query = Query(self.model) - query.where.add(query.build_filter(filter_expr), AND) + query.add_filter(filter_expr) query.clear_ordering(True) # Try to have as simple as possible subquery -> trim leading joins from # the subquery. @@ -1553,15 +1604,12 @@ class Query(object): try: for name in field_names: + # Join promotion note - we must not remove any rows here, so + # if there is no existing joins, use outer join. field, targets, u2, joins, path = self.setup_joins( - name.split(LOOKUP_SEP), opts, alias, None, allow_m2m, - True) - - # Trim last join if possible - targets, final_alias, remaining_joins = self.trim_joins(targets, joins[-2:], path) - joins = joins[:-2] + remaining_joins - - self.promote_joins(joins[1:]) + name.split(LOOKUP_SEP), opts, alias, can_reuse=None, + allow_many=allow_m2m, outer_if_first=True) + targets, final_alias, joins = self.trim_joins(targets, joins, path) for target in targets: self.select.append(SelectInfo((final_alias, target.column), target)) except MultiJoin: diff --git a/tests/aggregation_regress/models.py b/tests/aggregation_regress/models.py index 24859f4ce4..a2dc060640 100644 --- a/tests/aggregation_regress/models.py +++ b/tests/aggregation_regress/models.py @@ -90,7 +90,7 @@ class HardbackBook(Book): # Models for ticket #21150 class Alfa(models.Model): - pass + name = models.CharField(max_length=10, null=True) class Bravo(models.Model): pass diff --git a/tests/aggregation_regress/tests.py b/tests/aggregation_regress/tests.py index 82be5afab2..9fcaefe6f5 100644 --- a/tests/aggregation_regress/tests.py +++ b/tests/aggregation_regress/tests.py @@ -12,9 +12,8 @@ from django.test import TestCase, skipUnlessDBFeature from django.test.utils import Approximate from django.utils import six -from .models import ( - Author, Book, Publisher, Clues, Entries, HardbackBook, ItemTag, - WithManualPK, Alfa, Bravo, Charlie) +from .models import (Author, Book, Publisher, Clues, Entries, HardbackBook, + ItemTag, WithManualPK, Alfa, Bravo, Charlie) class AggregationTests(TestCase): @@ -1137,6 +1136,8 @@ class AggregationTests(TestCase): 'select__avg': Approximate(1.666, places=2), }) + +class JoinPromotionTests(TestCase): def test_ticket_21150(self): b = Bravo.objects.create() c = Charlie.objects.create(bravo=b) @@ -1152,3 +1153,19 @@ class AggregationTests(TestCase): self.assertQuerysetEqual( qs, [c], lambda x: x) self.assertEqual(qs[0].alfa, a) + + def test_existing_join_not_promoted(self): + # No promotion for existing joins + qs = Charlie.objects.filter(alfa__name__isnull=False).annotate(Count('alfa__name')) + self.assertTrue(' INNER JOIN ' in str(qs.query)) + # Also, the existing join is unpromoted when doing filtering for already + # promoted join. + qs = Charlie.objects.annotate(Count('alfa__name')).filter(alfa__name__isnull=False) + self.assertTrue(' INNER JOIN ' in str(qs.query)) + # But, as the join is nullable first use by annotate will be LOUTER + qs = Charlie.objects.annotate(Count('alfa__name')) + self.assertTrue(' LEFT OUTER JOIN ' in str(qs.query)) + + def test_non_nullable_fk_not_promoted(self): + qs = Book.objects.annotate(Count('contact__name')) + self.assertTrue(' INNER JOIN ' in str(qs.query)) diff --git a/tests/queries/tests.py b/tests/queries/tests.py index 7bb7a150a2..a7b0af55a3 100644 --- a/tests/queries/tests.py +++ b/tests/queries/tests.py @@ -2689,6 +2689,15 @@ class NullJoinPromotionOrTest(TestCase): self.assertEqual(str(qs.query).count('INNER JOIN'), 1) self.assertEqual(list(qs), [self.a2]) + def test_null_join_demotion(self): + qs = ModelA.objects.filter(Q(b__name__isnull=False) & Q(b__name__isnull=True)) + self.assertTrue(' INNER JOIN ' in str(qs.query)) + qs = ModelA.objects.filter(Q(b__name__isnull=True) & Q(b__name__isnull=False)) + self.assertTrue(' INNER JOIN ' in str(qs.query)) + qs = ModelA.objects.filter(Q(b__name__isnull=False) | Q(b__name__isnull=True)) + self.assertTrue(' LEFT OUTER JOIN ' in str(qs.query)) + qs = ModelA.objects.filter(Q(b__name__isnull=True) | Q(b__name__isnull=False)) + self.assertTrue(' LEFT OUTER JOIN ' in str(qs.query)) class ReverseJoinTrimmingTest(TestCase): def test_reverse_trimming(self): @@ -2785,22 +2794,19 @@ class DisjunctionPromotionTests(TestCase): self.assertEqual(str(qs.query).count('INNER JOIN'), 1) self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 1) - @unittest.expectedFailure - def test_disjunction_promotion3_failing(self): - # Now the ORed filter creates LOUTER join, but we do not have - # logic to unpromote it for the AND filter after it. The query - # results will be correct, but we have one LOUTER JOIN too much - # currently. + def test_disjunction_promotion3_demote(self): + # This one needs demotion logic: the first filter causes a to be + # outer joined, the second filter makes it inner join again. qs = BaseA.objects.filter( Q(a__f1='foo') | Q(b__f2='foo')).filter(a__f2='bar') self.assertEqual(str(qs.query).count('INNER JOIN'), 1) self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 1) - @unittest.expectedFailure - def test_disjunction_promotion4_failing(self): - # Failure because no join repromotion + def test_disjunction_promotion4_demote(self): qs = BaseA.objects.filter(Q(a=1) | Q(a=2)) self.assertEqual(str(qs.query).count('JOIN'), 0) + # Demote needed for the "a" join. It is marked as outer join by + # above filter (even if it is trimmed away). qs = qs.filter(a__f1='foo') self.assertEqual(str(qs.query).count('INNER JOIN'), 1) @@ -2810,9 +2816,8 @@ class DisjunctionPromotionTests(TestCase): qs = qs.filter(Q(a=1) | Q(a=2)) self.assertEqual(str(qs.query).count('INNER JOIN'), 1) - @unittest.expectedFailure - def test_disjunction_promotion5_failing(self): - # Failure because no join repromotion logic. + def test_disjunction_promotion5_demote(self): + # Failure because no join demotion logic for this case. qs = BaseA.objects.filter(Q(a=1) | Q(a=2)) # Note that the above filters on a force the join to an # inner join even if it is trimmed. @@ -2823,8 +2828,8 @@ class DisjunctionPromotionTests(TestCase): self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 1) qs = BaseA.objects.filter(Q(a__f1='foo') | Q(b__f1='foo')) # Now the join to a is created as LOUTER - self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 0) - qs = qs.objects.filter(Q(a=1) | Q(a=2)) + self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 2) + qs = qs.filter(Q(a=1) | Q(a=2)) self.assertEqual(str(qs.query).count('INNER JOIN'), 1) self.assertEqual(str(qs.query).count('LEFT OUTER JOIN'), 1) @@ -3079,3 +3084,17 @@ class Ticket21203Tests(TestCase): qs = Ticket21203Child.objects.select_related('parent').defer('parent__created') self.assertQuerysetEqual(qs, [c], lambda x: x) self.assertIs(qs[0].parent.parent_bool, True) + +class ValuesJoinPromotionTests(TestCase): + def test_values_no_promotion_for_existing(self): + qs = Node.objects.filter(parent__parent__isnull=False) + self.assertTrue(' INNER JOIN ' in str(qs.query)) + qs = qs.values('parent__parent__id') + self.assertTrue(' INNER JOIN ' in str(qs.query)) + # Make sure there is a left outer join without the filter. + qs = Node.objects.values('parent__parent__id') + self.assertTrue(' LEFT OUTER JOIN ' in str(qs.query)) + + def test_non_nullable_fk_not_promoted(self): + qs = ObjectB.objects.values('objecta__name') + self.assertTrue(' INNER JOIN ' in str(qs.query))