1
0
mirror of https://github.com/django/django.git synced 2025-07-05 18:29:11 +00:00

queryset-refactor: More whack-a-mole optimisation work. Still got a couple of big spots to go, though.

git-svn-id: http://code.djangoproject.com/svn/django/branches/queryset-refactor@7253 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Malcolm Tredinnick 2008-03-16 16:18:39 +00:00
parent 50a1880100
commit 067d380e98
6 changed files with 67 additions and 84 deletions

View File

@ -160,11 +160,10 @@ class Field(object):
# mapped to one of the built-in Django field types. In this case, you # mapped to one of the built-in Django field types. In this case, you
# can implement db_type() instead of get_internal_type() to specify # can implement db_type() instead of get_internal_type() to specify
# exactly which wacky database column type you want to use. # exactly which wacky database column type you want to use.
data_types = get_creation_module().DATA_TYPES try:
internal_type = self.get_internal_type() return get_creation_module().DATA_TYPES[self.get_internal_type()] % self.__dict__
if internal_type not in data_types: except KeyError:
return None return None
return data_types[internal_type] % self.__dict__
def validate_full(self, field_data, all_data): def validate_full(self, field_data, all_data):
""" """

View File

@ -123,6 +123,7 @@ class Options(object):
self.setup_pk(field) self.setup_pk(field)
if hasattr(self, '_field_cache'): if hasattr(self, '_field_cache'):
del self._field_cache del self._field_cache
del self._field_name_cache
if hasattr(self, '_name_map'): if hasattr(self, '_name_map'):
del self._name_map del self._name_map
@ -155,17 +156,20 @@ class Options(object):
""" """
The getter for self.fields. This returns the list of field objects The getter for self.fields. This returns the list of field objects
available to this model (including through parent models). available to this model (including through parent models).
Callers are not permitted to modify this list, since it's a reference
to this instance (not a copy).
""" """
try: try:
self._field_cache self._field_name_cache
except AttributeError: except AttributeError:
self._fill_fields_cache() self._fill_fields_cache()
return self._field_cache.keys() return self._field_name_cache
fields = property(_fields) fields = property(_fields)
def get_fields_with_model(self): def get_fields_with_model(self):
""" """
Returns a list of (field, model) pairs for all fields. The "model" Returns a sequence of (field, model) pairs for all fields. The "model"
element is None for fields on the current model. Mostly of use when element is None for fields on the current model. Mostly of use when
constructing queries so that we know which model a field belongs to. constructing queries so that we know which model a field belongs to.
""" """
@ -173,19 +177,19 @@ class Options(object):
self._field_cache self._field_cache
except AttributeError: except AttributeError:
self._fill_fields_cache() self._fill_fields_cache()
return self._field_cache.items() return self._field_cache
def _fill_fields_cache(self): def _fill_fields_cache(self):
cache = SortedDict() cache = []
for parent in self.parents: for parent in self.parents:
for field, model in parent._meta.get_fields_with_model(): for field, model in parent._meta.get_fields_with_model():
if model: if model:
cache[field] = model cache.append((field, model))
else: else:
cache[field] = parent cache.append((field, parent))
for field in self.local_fields: cache.extend([(f, None) for f in self.local_fields])
cache[field] = None self._field_cache = tuple(cache)
self._field_cache = cache self._field_name_cache = [x for x, _ in cache]
def _many_to_many(self): def _many_to_many(self):
try: try:

View File

@ -31,10 +31,9 @@ class _QuerySet(object):
return repr(list(self)) return repr(list(self))
def __len__(self): def __len__(self):
# Since __len__ is called quite frequently (as part of list(qs), which # Since __len__ is called quite frequently (for example, as part of
# means as part of qs.get(), for example), we make some effort here to # list(qs), we make some effort here to be as efficient as possible
# be as efficient as possible whilst not messing up any existing # whilst not messing up any existing iterators against the queryset.
# iterators against the queryset.
if self._result_cache is None: if self._result_cache is None:
if self._iter: if self._iter:
self._result_cache = list(self._iter()) self._result_cache = list(self._iter())
@ -50,6 +49,8 @@ class _QuerySet(object):
self._result_cache = [] self._result_cache = []
if self._iter: if self._iter:
return self._result_iter() return self._result_iter()
# Python's list iterator is better than our version when we're just
# iterating over the cache.
return iter(self._result_cache) return iter(self._result_cache)
def _result_iter(self): def _result_iter(self):

View File

@ -7,6 +7,7 @@ databases). The abstraction barrier only works one way: this module has to know
all about the internals of models in order to get the information it needs. all about the internals of models in order to get the information it needs.
""" """
import itertools
from copy import deepcopy from copy import deepcopy
from django.utils.tree import Node from django.utils.tree import Node
@ -223,13 +224,13 @@ class Query(object):
if where: if where:
result.append('WHERE %s' % where) result.append('WHERE %s' % where)
params.extend(w_params)
if self.extra_where: if self.extra_where:
if not where: if not where:
result.append('WHERE') result.append('WHERE')
else: else:
result.append('AND') result.append('AND')
result.append(' AND'.join(self.extra_where)) result.append(' AND'.join(self.extra_where))
params.extend(w_params)
if self.group_by: if self.group_by:
grouping = self.get_grouping() grouping = self.get_grouping()
@ -361,39 +362,40 @@ class Query(object):
if hasattr(col, 'alias'): if hasattr(col, 'alias'):
aliases.append(col.alias) aliases.append(col.alias)
elif self.default_cols: elif self.default_cols:
result = self.get_default_columns(lambda x, y: "%s.%s" % (qn(x), qn(y))) result = self.get_default_columns(True)
aliases = result[:] aliases = result[:]
result.extend(['(%s) AS %s' % (col, alias) result.extend(['(%s) AS %s' % (col, alias)
for alias, col in self.extra_select.items()]) for alias, col in self.extra_select.items()])
aliases.extend(self.extra_select.keys()) aliases.extend(self.extra_select.keys())
self._select_aliases = dict.fromkeys(aliases) self._select_aliases = set(aliases)
return result return result
def get_default_columns(self, combine_func=None): def get_default_columns(self, as_str=False):
""" """
Computes the default columns for selecting every field in the base Computes the default columns for selecting every field in the base
model. Returns a list of default (alias, column) pairs suitable for model. Returns a list of default (alias, column) pairs suitable for
direct inclusion as the select columns. The 'combine_func' can be inclusion as the select columns. If 'as_str' is True, returns a list of
passed in to change the returned data set to a list of some other strings, quoted appropriately for use in SQL directly.
structure.
""" """
# Note: We allow 'combine_func' here because this method is called a
# lot. The extra overhead from returning a list and then transforming
# it in get_columns() hurt performance in a measurable way.
result = [] result = []
table_alias = self.tables[0] table_alias = self.tables[0]
root_pk = self.model._meta.pk.column root_pk = self.model._meta.pk.column
seen = {None: table_alias} seen = {None: table_alias}
qn = self.quote_name_unless_alias
qn2 = self.connection.ops.quote_name
for field, model in self.model._meta.get_fields_with_model(): for field, model in self.model._meta.get_fields_with_model():
if model not in seen: try:
seen[model] = self.join((table_alias, model._meta.db_table, alias = seen[model]
except KeyError:
alias = self.join((table_alias, model._meta.db_table,
root_pk, model._meta.pk.column)) root_pk, model._meta.pk.column))
if combine_func: seen[model] = alias
result.append(combine_func(seen[model], field.column)) if as_str:
result.append('%s.%s' % (qn(alias), qn2(field.column)))
else: else:
result.append((seen[model], field.column)) result.append((alias, field.column))
return result return result
def get_from_clause(self): def get_from_clause(self):
@ -897,7 +899,7 @@ class Query(object):
# from any previous joins (ref count is 1 in the table list), we # from any previous joins (ref count is 1 in the table list), we
# make the new additions (and any existing ones not used in the new # make the new additions (and any existing ones not used in the new
# join list) an outer join. # join list) an outer join.
join_it = nested_iter(join_list) join_it = itertools.chain(*join_list)
table_it = iter(self.tables) table_it = iter(self.tables)
join_it.next(), table_it.next() join_it.next(), table_it.next()
for join in join_it: for join in join_it:
@ -1325,21 +1327,19 @@ class Query(object):
raise EmptyResultSet raise EmptyResultSet
except EmptyResultSet: except EmptyResultSet:
if result_type == MULTI: if result_type == MULTI:
raise StopIteration return empty_iter()
else: else:
return return
cursor = self.connection.cursor() cursor = self.connection.cursor()
cursor.execute(sql, params) cursor.execute(sql, params)
if result_type is None: if not result_type:
return cursor return cursor
if result_type == SINGLE: if result_type == SINGLE:
return cursor.fetchone() return cursor.fetchone()
# The MULTI case. # The MULTI case.
return results_iter(cursor) return iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)), [])
def get_order_dir(field, default='ASC'): def get_order_dir(field, default='ASC'):
""" """
@ -1354,24 +1354,11 @@ def get_order_dir(field, default='ASC'):
return field[1:], dirn[1] return field[1:], dirn[1]
return field, dirn[0] return field, dirn[0]
def results_iter(cursor): def empty_iter():
""" """
An iterator over the result set that returns a chunk of rows at a time. Returns an iterator containing no results.
""" """
while 1: yield iter([]).next()
rows = cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)
if not rows:
raise StopIteration
yield rows
def nested_iter(nested):
"""
An iterator over a sequence of sequences. Each element is returned in turn.
Only handles one level of nesting, since that's all we need here.
"""
for seq in nested:
for elt in seq:
yield elt
def setup_join_cache(sender): def setup_join_cache(sender):
""" """

View File

@ -238,40 +238,29 @@ class UpdateQuery(Query):
class InsertQuery(Query): class InsertQuery(Query):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(InsertQuery, self).__init__(*args, **kwargs) super(InsertQuery, self).__init__(*args, **kwargs)
self._setup_query()
def _setup_query(self):
"""
Run on initialisation and after cloning.
"""
self.columns = [] self.columns = []
self.values = [] self.values = []
self.params = ()
def clone(self, klass=None, **kwargs):
extras = {'columns': self.columns[:], 'values': self.values[:],
'params': self.params}
return super(InsertQuery, self).clone(klass, extras)
def as_sql(self): def as_sql(self):
self.select_related = False # We don't need quote_name_unless_alias() here, since these are all
self.pre_sql_setup() # going to be column names (so we can avoid the extra overhead).
qn = self.quote_name_unless_alias qn = self.connection.ops.quote_name
result = ['INSERT INTO %s' % qn(self.tables[0])] result = ['INSERT INTO %s' % qn(self.model._meta.db_table)]
result.append('(%s)' % ', '.join([qn(c) for c in self.columns])) result.append('(%s)' % ', '.join([qn(c) for c in self.columns]))
result.append('VALUES (') result.append('VALUES (%s)' % ', '.join(self.values))
params = [] return ' '.join(result), self.params
first = True
for value in self.values:
prefix = not first and ', ' or ''
if isinstance(value, RawValue):
result.append('%s%s' % (prefix, value.value))
else:
result.append('%s%%s' % prefix)
params.append(value)
first = False
result.append(')')
return ' '.join(result), tuple(params)
def execute_sql(self, return_id=False): def execute_sql(self, return_id=False):
cursor = super(InsertQuery, self).execute_sql(None) cursor = super(InsertQuery, self).execute_sql(None)
if return_id: if return_id:
return self.connection.ops.last_insert_id(cursor, self.tables[0], return self.connection.ops.last_insert_id(cursor,
self.model._meta.pk.column) self.model._meta.db_table, self.model._meta.pk.column)
def insert_values(self, insert_values, raw_values=False): def insert_values(self, insert_values, raw_values=False):
""" """
@ -285,16 +274,18 @@ class InsertQuery(Query):
""" """
func = lambda x: self.model._meta.get_field_by_name(x)[0].column func = lambda x: self.model._meta.get_field_by_name(x)[0].column
# keys() and values() return items in the same order, providing the # keys() and values() return items in the same order, providing the
# dictionary hasn't changed between calls. So these lines work as # dictionary hasn't changed between calls. So the dual iteration here
# intended. # works as intended.
for name in insert_values: for name in insert_values:
if name == 'pk': if name == 'pk':
name = self.model._meta.pk.name name = self.model._meta.pk.name
self.columns.append(func(name)) self.columns.append(func(name))
if raw_values: if raw_values:
self.values.extend([RawValue(v) for v in insert_values.values()])
else:
self.values.extend(insert_values.values()) self.values.extend(insert_values.values())
else:
values = insert_values.values()
self.params += tuple(values)
self.values.extend(['%s'] * len(values))
class DateQuery(Query): class DateQuery(Query):
""" """

View File

@ -263,5 +263,6 @@ True
<Chef: Albert the chef> <Chef: Albert the chef>
>>> len(db.connection.queries) >>> len(db.connection.queries)
3 3
>>> settings.DEBUG = False
"""} """}