From 93baa3e4170ea40be967d48560ffca55395be07b Mon Sep 17 00:00:00 2001 From: Malcolm Tredinnick Date: Sat, 15 Mar 2008 09:13:22 +0000 Subject: [PATCH] queryset-refactor: Optimised some internal data structures in sql/query.py. Mostly this involves changing them to a "copy on write" implementation, which speeds up cloning (and the relevant structures aren't updated very frequently). git-svn-id: http://code.djangoproject.com/svn/django/branches/queryset-refactor@7247 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/db/models/query.py | 11 +- django/db/models/sql/constants.py | 6 +- django/db/models/sql/query.py | 174 ++++++++++++++---------- django/db/models/sql/subqueries.py | 2 +- django/db/models/sql/where.py | 6 +- tests/regressiontests/queries/models.py | 6 +- 6 files changed, 112 insertions(+), 93 deletions(-) diff --git a/django/db/models/query.py b/django/db/models/query.py index e13aed393e..3d2e6cea1e 100644 --- a/django/db/models/query.py +++ b/django/db/models/query.py @@ -421,16 +421,7 @@ class _QuerySet(object): assert self.query.can_filter(), \ "Cannot change a query once a slice has been taken" clone = self._clone() - if select: - clone.query.extra_select.update(select) - if where: - clone.query.extra_where.extend(where) - if params: - clone.query.extra_params.extend(params) - if tables: - clone.query.extra_tables.extend(tables) - if order_by: - clone.query.extra_order_by = order_by + clone.query.add_extra(select, where, params, tables, order_by) return clone def reverse(self): diff --git a/django/db/models/sql/constants.py b/django/db/models/sql/constants.py index 26404089e5..3075817385 100644 --- a/django/db/models/sql/constants.py +++ b/django/db/models/sql/constants.py @@ -22,11 +22,7 @@ JOIN_TYPE = 2 LHS_ALIAS = 3 LHS_JOIN_COL = 4 RHS_JOIN_COL = 5 -# Alias map lists -ALIAS_TABLE = 0 -ALIAS_REFCOUNT = 1 -ALIAS_JOIN = 2 -ALIAS_NULLABLE=3 +NULLABLE = 6 # How many results to expect from a cursor.execute call MULTI = 'multi' diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py index e582630bcc..8635951106 100644 --- a/django/db/models/sql/query.py +++ b/django/db/models/sql/query.py @@ -7,7 +7,7 @@ databases). The abstraction barrier only works one way: this module has to know all about the internals of models in order to get the information it needs. """ -import copy +from copy import deepcopy from django.utils.tree import Node from django.utils.datastructures import SortedDict @@ -42,7 +42,8 @@ class Query(object): def __init__(self, model, connection, where=WhereNode): self.model = model self.connection = connection - self.alias_map = {} # Maps alias to table name + self.alias_refcount = {} + self.alias_map = {} # Maps alias to join information self.table_map = {} # Maps table names to list of aliases. self.rev_join_map = {} # Reverse of join_map. (FIXME: Update comment) self.quote_cache = {} @@ -69,11 +70,11 @@ class Query(object): # These are for extensions. The contents are more or less appended # verbatim to the appropriate clause. - self.extra_select = SortedDict() # Maps col_alias -> col_sql. - self.extra_tables = [] - self.extra_where = [] - self.extra_params = [] - self.extra_order_by = [] + self.extra_select = {} # Maps col_alias -> col_sql. + self.extra_tables = () + self.extra_where = () + self.extra_params = () + self.extra_order_by = () def __str__(self): """ @@ -125,7 +126,8 @@ class Query(object): obj.__class__ = klass or self.__class__ obj.model = self.model obj.connection = self.connection - obj.alias_map = copy.deepcopy(self.alias_map) + obj.alias_refcount = self.alias_refcount.copy() + obj.alias_map = self.alias_map.copy() obj.table_map = self.table_map.copy() obj.rev_join_map = self.rev_join_map.copy() obj.quote_cache = {} @@ -135,7 +137,7 @@ class Query(object): obj.start_meta = self.start_meta obj.select = self.select[:] obj.tables = self.tables[:] - obj.where = copy.deepcopy(self.where) + obj.where = deepcopy(self.where) obj.where_class = self.where_class obj.group_by = self.group_by[:] obj.having = self.having[:] @@ -145,10 +147,10 @@ class Query(object): obj.select_related = self.select_related obj.max_depth = self.max_depth obj.extra_select = self.extra_select.copy() - obj.extra_tables = self.extra_tables[:] - obj.extra_where = self.extra_where[:] - obj.extra_params = self.extra_params[:] - obj.extra_order_by = self.extra_order_by[:] + obj.extra_tables = self.extra_tables + obj.extra_where = self.extra_where + obj.extra_params = self.extra_params + obj.extra_order_by = self.extra_order_by obj.__dict__.update(kwargs) if hasattr(obj, '_setup_query'): obj._setup_query() @@ -179,7 +181,7 @@ class Query(object): obj = self.clone(CountQuery, _query=obj, where=self.where_class(), distinct=False) obj.select = [] - obj.extra_select = SortedDict() + obj.extra_select = {} obj.add_count_column() data = obj.execute_sql(SINGLE) if not data: @@ -272,11 +274,10 @@ class Query(object): conjunction = (connector == AND) first = True for alias in rhs.tables: - if not rhs.alias_map[alias][ALIAS_REFCOUNT]: + if not rhs.alias_refcount[alias]: # An unused alias. continue - promote = (rhs.alias_map[alias][ALIAS_JOIN][JOIN_TYPE] == - self.LOUTER) + promote = (rhs.alias_map[alias][JOIN_TYPE] == self.LOUTER) new_alias = self.join(rhs.rev_join_map[alias], (conjunction and not first), used, promote, not conjunction) used[new_alias] = None @@ -288,14 +289,14 @@ class Query(object): # to an outer join. if not conjunction: for alias in self.tables[1:]: - if self.alias_map[alias][ALIAS_REFCOUNT] == 1: - self.alias_map[alias][ALIAS_JOIN][JOIN_TYPE] = self.LOUTER + if self.alias_refcount[alias] == 1: + self.promote_alias(alias, True) break # Now relabel a copy of the rhs where-clause and add it to the current # one. if rhs.where: - w = copy.deepcopy(rhs.where) + w = deepcopy(rhs.where) w.relabel_aliases(change_map) if not self.where: # Since 'self' matches everything, add an explicit "include @@ -316,19 +317,18 @@ class Query(object): if isinstance(col, (list, tuple)): self.select.append((change_map.get(col[0], col[0]), col[1])) else: - item = copy.deepcopy(col) + item = deepcopy(col) item.relabel_aliases(change_map) self.select.append(item) self.extra_select = rhs.extra_select.copy() - self.extra_tables = rhs.extra_tables[:] - self.extra_where = rhs.extra_where[:] - self.extra_params = rhs.extra_params[:] + self.extra_tables = rhs.extra_tables + self.extra_where = rhs.extra_where + self.extra_params = rhs.extra_params # Ordering uses the 'rhs' ordering, unless it has none, in which case # the current ordering is used. self.order_by = rhs.order_by and rhs.order_by[:] or self.order_by - self.extra_order_by = (rhs.extra_order_by and rhs.extra_order_by[:] or - self.extra_order_by) + self.extra_order_by = rhs.extra_order_by or self.extra_order_by def pre_sql_setup(self): """ @@ -411,11 +411,11 @@ class Query(object): qn = self.quote_name_unless_alias first = True for alias in self.tables: - if not self.alias_map[alias][ALIAS_REFCOUNT]: + if not self.alias_refcount[alias]: continue - join = self.alias_map[alias][ALIAS_JOIN] + join = self.alias_map[alias] if join: - name, alias, join_type, lhs, lhs_col, col = join + name, alias, join_type, lhs, lhs_col, col, nullable = join alias_str = (alias != name and ' AS %s' % alias or '') else: join_type = None @@ -429,7 +429,6 @@ class Query(object): connector = not first and ', ' or '' result.append('%s%s%s' % (connector, qn(name), alias_str)) first = False - extra_tables = [] for t in self.extra_tables: alias, created = self.table_alias(t) if created: @@ -554,7 +553,7 @@ class Query(object): # We have to do the same "final join" optimisation as in # add_filter, since the final column might not otherwise be part of # the select set (so we can't order on it). - join = self.alias_map[alias][ALIAS_JOIN] + join = self.alias_map[alias] if col == join[RHS_JOIN_COL]: self.unref_alias(alias) alias = join[LHS_ALIAS] @@ -571,7 +570,7 @@ class Query(object): """ if not create and table_name in self.table_map: alias = self.table_map[table_name][-1] - self.alias_map[alias][ALIAS_REFCOUNT] += 1 + self.alias_refcount[alias] += 1 return alias, False # Create a new alias for this table. @@ -580,26 +579,32 @@ class Query(object): alias = table_name else: alias = '%s%d' % (self.alias_prefix, len(self.alias_map) + 1) - self.alias_map[alias] = [table_name, 1, None, False] + self.alias_refcount[alias] = 1 + self.alias_map[alias] = None self.table_map.setdefault(table_name, []).append(alias) self.tables.append(alias) return alias, True def ref_alias(self, alias): """ Increases the reference count for this alias. """ - self.alias_map[alias][ALIAS_REFCOUNT] += 1 + self.alias_refcount[alias] += 1 def unref_alias(self, alias): """ Decreases the reference count for this alias. """ - self.alias_map[alias][ALIAS_REFCOUNT] -= 1 + self.alias_refcount[alias] -= 1 - def promote_alias(self, alias): + def promote_alias(self, alias, unconditional=False): """ Promotes the join type of an alias to an outer join if it's possible - for the join to contain NULL values on the left. + for the join to contain NULL values on the left. If 'unconditional' is + False, the join is only promoted if it is nullable, otherwise it is + always promoted. """ - if self.alias_map[alias][ALIAS_NULLABLE]: - self.alias_map[alias][ALIAS_JOIN][JOIN_TYPE] = self.LOUTER + if ((unconditional or self.alias_map[alias][NULLABLE]) and + self.alias_map[alias] != self.LOUTER): + data = list(self.alias_map[alias]) + data[JOIN_TYPE] = self.LOUTER + self.alias_map[alias] = tuple(data) def change_aliases(self, change_map): """ @@ -619,27 +624,34 @@ class Query(object): # 2. Rename the alias in the internal table/alias datastructures. for old_alias, new_alias in change_map.items(): - alias_data = self.alias_map[old_alias] - alias_data[ALIAS_JOIN][RHS_ALIAS] = new_alias + alias_data = list(self.alias_map[old_alias]) + alias_data[RHS_ALIAS] = new_alias + self.rev_join_map[new_alias] = self.rev_join_map[old_alias] del self.rev_join_map[old_alias] - table_aliases = self.table_map[alias_data[ALIAS_TABLE]] + self.alias_refcount[new_alias] = self.alias_refcount[old_alias] + del self.alias_refcount[old_alias] + self.alias_map[new_alias] = tuple(alias_data) + del self.alias_map[old_alias] + + table_aliases = self.table_map[alias_data[TABLE_NAME]] for pos, alias in enumerate(table_aliases): if alias == old_alias: table_aliases[pos] = new_alias break - self.alias_map[new_alias] = alias_data - del self.alias_map[old_alias] for pos, alias in enumerate(self.tables): if alias == old_alias: self.tables[pos] = new_alias break # 3. Update any joins that refer to the old alias. - for data in self.alias_map.values(): - alias = data[ALIAS_JOIN][LHS_ALIAS] - if alias in change_map: - data[ALIAS_JOIN][LHS_ALIAS] = change_map[alias] + for alias, data in self.alias_map.items(): + lhs = data[LHS_ALIAS] + if lhs in change_map: + data = list(data) + data[LHS_ALIAS] = change_map[lhs] + self.alias_map[alias] = tuple(data) + def bump_prefix(self): """ @@ -678,7 +690,7 @@ class Query(object): Returns the number of tables in this query with a non-zero reference count. """ - return len([1 for o in self.alias_map.values() if o[ALIAS_REFCOUNT]]) + return len([1 for count in self.alias_refcount.values() if count]) def join(self, connection, always_create=False, exclusions=(), promote=False, outer_if_first=False, nullable=False): @@ -717,34 +729,33 @@ class Query(object): lhs_table = lhs is_table = True else: - lhs_table = self.alias_map[lhs][ALIAS_TABLE] + lhs_table = self.alias_map[lhs][TABLE_NAME] is_table = False t_ident = (lhs_table, table, lhs_col, col) if not always_create: for alias, row in self.rev_join_map.items(): if t_ident == row and alias not in exclusions: self.ref_alias(alias) - if promote and self.alias_map[alias][ALIAS_NULLABLE]: - self.alias_map[alias][ALIAS_JOIN][JOIN_TYPE] = self.LOUTER + if promote: + self.promote_alias(alias) return alias - # If we get to here (no non-excluded alias exists), we'll fall - # through to creating a new alias. + # If we get to here (no non-excluded alias exists), we'll fall + # through to creating a new alias. # No reuse is possible, so we need a new alias. assert not is_table, \ "Must pass in lhs alias when creating a new join." alias, _ = self.table_alias(table, True) - if promote or outer_if_first: - join_type = self.LOUTER - else: - join_type = self.INNER - join = [table, alias, join_type, lhs, lhs_col, col] if not lhs: # Not all tables need to be joined to anything. No join type # means the later columns are ignored. - join[JOIN_TYPE] = None - self.alias_map[alias][ALIAS_JOIN] = join - self.alias_map[alias][ALIAS_NULLABLE] = nullable + join_type = None + elif promote or outer_if_first: + join_type = self.LOUTER + else: + join_type = self.INNER + join = (table, alias, join_type, lhs, lhs_col, col, nullable) + self.alias_map[alias] = join self.rev_join_map[alias] = t_ident return alias @@ -850,7 +861,7 @@ class Query(object): if trim and len(join_list) > 1: extra = join_list[-1] join_list = join_list[:-1] - col = self.alias_map[extra[0]][ALIAS_JOIN][LHS_JOIN_COL] + col = self.alias_map[extra[0]][LHS_JOIN_COL] for alias in extra: self.unref_alias(alias) else: @@ -862,7 +873,7 @@ class Query(object): # we are comparing against, we can go back one step in the join # chain and compare against the lhs of the join instead. The result # (potentially) involves one less table join. - join = self.alias_map[alias][ALIAS_JOIN] + join = self.alias_map[alias] if col == join[RHS_JOIN_COL]: self.unref_alias(alias) alias = join[LHS_ALIAS] @@ -891,7 +902,7 @@ class Query(object): join_it.next(), table_it.next() for join in join_it: table = table_it.next() - if join == table and self.alias_map[join][ALIAS_REFCOUNT] > 1: + if join == table and self.alias_refcount[join] > 1: continue self.promote_alias(join) if table != join: @@ -904,7 +915,7 @@ class Query(object): # that's harmless. self.promote_alias(table) - entry = [alias, col, field, lookup_type, value] + entry = (alias, col, field, lookup_type, value) if merge_negated: # This case is when we're doing the Q2 filter in exclude(Q1, Q2). # It's different from exclude(Q1).exclude(Q2). @@ -926,8 +937,8 @@ class Query(object): if not merged: self.where.negate() if count > 1 and lookup_type != 'isnull': - j_col = self.alias_map[alias][ALIAS_JOIN][RHS_JOIN_COL] - entry = Node([[alias, j_col, None, 'isnull', True]]) + j_col = self.alias_map[alias][RHS_JOIN_COL] + entry = Node([(alias, j_col, None, 'isnull', True)]) entry.negate() self.where.add(entry, AND) @@ -1198,7 +1209,7 @@ class Query(object): no ordering in the resulting query (not even the model's default). """ self.order_by = [] - self.extra_order_by = [] + self.extra_order_by = () if force_empty: self.default_ordering = False @@ -1232,7 +1243,7 @@ class Query(object): # level. self.distinct = False self.select = [select] - self.extra_select = SortedDict() + self.extra_select = {} def add_select_related(self, fields): """ @@ -1247,6 +1258,27 @@ class Query(object): d = d.setdefault(part, {}) self.select_related = field_dict + def add_extra(self, select, where, params, tables, order_by): + """ + Adds data to the various extra_* attributes for user-created additiosn + to the query. + """ + if select: + # The extra select might be ordered (because it will be accepting + # parameters). + if (isinstance(select, SortedDict) and + not isinstance(self.extra_select, SortedDict)): + self.extra_select = SortedDict(self.extra_select) + self.extra_select.update(select) + if where: + self.extra_where += tuple(where) + if params: + self.extra_params += tuple(params) + if tables: + self.extra_tables += tuple(tables) + if order_by: + self.extra_order_by = order_by + def set_start(self, start): """ Sets the table from which to start joining. The start position is @@ -1263,7 +1295,7 @@ class Query(object): field, col, opts, joins = self.setup_joins(start.split(LOOKUP_SEP), opts, alias, False) alias = joins[-1][0] - self.select = [(alias, self.alias_map[alias][ALIAS_JOIN][RHS_JOIN_COL])] + self.select = [(alias, self.alias_map[alias][RHS_JOIN_COL])] self.start_meta = opts # The call to setup_joins add an extra reference to everything in diff --git a/django/db/models/sql/subqueries.py b/django/db/models/sql/subqueries.py index 2594b9fcce..e4e439f8c6 100644 --- a/django/db/models/sql/subqueries.py +++ b/django/db/models/sql/subqueries.py @@ -175,7 +175,7 @@ class UpdateQuery(Query): else: self.add_filter(('pk__in', query)) for alias in self.tables[1:]: - self.alias_map[alias][ALIAS_REFCOUNT] = 0 + self.alias_refcount[alias] = 0 def clear_related(self, related_field, pk_list): """ diff --git a/django/db/models/sql/where.py b/django/db/models/sql/where.py index f0d2786a49..43e8d83658 100644 --- a/django/db/models/sql/where.py +++ b/django/db/models/sql/where.py @@ -151,14 +151,14 @@ class WhereNode(tree.Node): """ if not node: node = self - for child in node.children: + for pos, child in enumerate(node.children): if hasattr(child, 'relabel_aliases'): child.relabel_aliases(change_map) elif isinstance(child, tree.Node): self.relabel_aliases(change_map, child) else: - val = child[0] - child[0] = change_map.get(val, val) + if child[0] in change_map: + node.children[pos] = (change_map[child[0]],) + child[1:] class EverythingNode(object): """ diff --git a/tests/regressiontests/queries/models.py b/tests/regressiontests/queries/models.py index 7185558f45..a9783569a7 100644 --- a/tests/regressiontests/queries/models.py +++ b/tests/regressiontests/queries/models.py @@ -205,7 +205,7 @@ Bug #1801 Bug #2306 Checking that no join types are "left outer" joins. >>> query = Item.objects.filter(tags=t2).query ->>> query.LOUTER not in [x[2][2] for x in query.alias_map.values()] +>>> query.LOUTER not in [x[2] for x in query.alias_map.values()] True >>> Item.objects.filter(Q(tags=t1)).order_by('name') @@ -332,12 +332,12 @@ Bug #5324, #6704 # Excluding from a relation that cannot be NULL should not use outer joins. >>> query = Item.objects.exclude(creator__in=[a1, a2]).query ->>> query.LOUTER not in [x[2][2] for x in query.alias_map.values()] +>>> query.LOUTER not in [x[2] for x in query.alias_map.values()] True Similarly, when one of the joins cannot possibly, ever, involve NULL values (Author -> ExtraInfo, in the following), it should never be promoted to a left outer join. So hte following query should only involve one "left outer" join (Author -> Item is 0-to-many). >>> qs = Author.objects.filter(id=a1.id).filter(Q(extra__note=n1)|Q(item__note=n3)) ->>> len([x[2][2] for x in qs.query.alias_map.values() if x[2][2] == query.LOUTER]) +>>> len([x[2] for x in qs.query.alias_map.values() if x[2] == query.LOUTER]) 1 The previous changes shouldn't affect nullable foreign key joins.