1
0
mirror of https://github.com/django/django.git synced 2025-10-29 00:26:07 +00:00

Fixed #24020 -- Refactored SQL compiler to use expressions

Refactored compiler SELECT, GROUP BY and ORDER BY generation.
While there, also refactored select_related() implementation
(get_cached_row() and get_klass_info() are now gone!).

Made get_db_converters() method work on expressions instead of
internal_type. This allows the backend converters to target
specific expressions if need be.

Added query.context, this can be used to set per-query state.

Also changed the signature of database converters. They now accept
context as an argument.
This commit is contained in:
Anssi Kääriäinen
2014-12-01 09:28:01 +02:00
committed by Tim Graham
parent b8abfe141b
commit 0c7633178f
41 changed files with 970 additions and 1416 deletions

View File

@@ -13,8 +13,7 @@ from django.db import (connections, router, transaction, IntegrityError,
DJANGO_VERSION_PICKLE_KEY)
from django.db.models.constants import LOOKUP_SEP
from django.db.models.fields import AutoField, Empty
from django.db.models.query_utils import (Q, select_related_descend,
deferred_class_factory, InvalidQuery)
from django.db.models.query_utils import Q, deferred_class_factory, InvalidQuery
from django.db.models.deletion import Collector
from django.db.models.sql.constants import CURSOR
from django.db.models import sql
@@ -233,76 +232,34 @@ class QuerySet(object):
An iterator over the results from applying this QuerySet to the
database.
"""
fill_cache = False
if connections[self.db].features.supports_select_related:
fill_cache = self.query.select_related
if isinstance(fill_cache, dict):
requested = fill_cache
else:
requested = None
max_depth = self.query.max_depth
extra_select = list(self.query.extra_select)
annotation_select = list(self.query.annotation_select)
only_load = self.query.get_loaded_field_names()
fields = self.model._meta.concrete_fields
load_fields = []
# If only/defer clauses have been specified,
# build the list of fields that are to be loaded.
if only_load:
for field in self.model._meta.concrete_fields:
model = field.model._meta.model
try:
if field.name in only_load[model]:
# Add a field that has been explicitly included
load_fields.append(field.name)
except KeyError:
# Model wasn't explicitly listed in the only_load table
# Therefore, we need to load all fields from this model
load_fields.append(field.name)
skip = None
if load_fields:
# Some fields have been deferred, so we have to initialize
# via keyword arguments.
skip = set()
init_list = []
for field in fields:
if field.name not in load_fields:
skip.add(field.attname)
else:
init_list.append(field.attname)
model_cls = deferred_class_factory(self.model, skip)
else:
model_cls = self.model
init_list = [f.attname for f in fields]
# Cache db and model outside the loop
db = self.db
compiler = self.query.get_compiler(using=db)
index_start = len(extra_select)
annotation_start = index_start + len(init_list)
if fill_cache:
klass_info = get_klass_info(model_cls, max_depth=max_depth,
requested=requested, only_load=only_load)
for row in compiler.results_iter():
if fill_cache:
obj, _ = get_cached_row(row, index_start, db, klass_info,
offset=len(annotation_select))
else:
obj = model_cls.from_db(db, init_list, row[index_start:annotation_start])
if extra_select:
for i, k in enumerate(extra_select):
setattr(obj, k, row[i])
# Add the annotations to the model
if annotation_select:
for i, annotation in enumerate(annotation_select):
setattr(obj, annotation, row[i + annotation_start])
# Execute the query. This will also fill compiler.select, klass_info,
# and annotations.
results = compiler.execute_sql()
select, klass_info, annotation_col_map = (compiler.select, compiler.klass_info,
compiler.annotation_col_map)
if klass_info is None:
return
model_cls = klass_info['model']
select_fields = klass_info['select_fields']
model_fields_start, model_fields_end = select_fields[0], select_fields[-1] + 1
init_list = [f[0].output_field.attname
for f in select[model_fields_start:model_fields_end]]
if len(init_list) != len(model_cls._meta.concrete_fields):
init_set = set(init_list)
skip = [f.attname for f in model_cls._meta.concrete_fields
if f.attname not in init_set]
model_cls = deferred_class_factory(model_cls, skip)
related_populators = get_related_populators(klass_info, select, db)
for row in compiler.results_iter(results):
obj = model_cls.from_db(db, init_list, row[model_fields_start:model_fields_end])
if related_populators:
for rel_populator in related_populators:
rel_populator.populate(row, obj)
if annotation_col_map:
for attr_name, col_pos in annotation_col_map.items():
setattr(obj, attr_name, row[col_pos])
# Add the known related objects to the model, if there are any
if self._known_related_objects:
@@ -1032,11 +989,8 @@ class QuerySet(object):
"""
Prepare the query for computing a result that contains aggregate annotations.
"""
opts = self.model._meta
if self.query.group_by is None:
field_names = [f.attname for f in opts.concrete_fields]
self.query.add_fields(field_names, False)
self.query.set_group_by()
self.query.group_by = True
def _prepare(self):
return self
@@ -1135,9 +1089,11 @@ class ValuesQuerySet(QuerySet):
Called by the _clone() method after initializing the rest of the
instance.
"""
if self.query.group_by is True:
self.query.add_fields([f.attname for f in self.model._meta.concrete_fields], False)
self.query.set_group_by()
self.query.clear_deferred_loading()
self.query.clear_select_fields()
if self._fields:
self.extra_names = []
self.annotation_names = []
@@ -1246,11 +1202,12 @@ class ValuesQuerySet(QuerySet):
class ValuesListQuerySet(ValuesQuerySet):
def iterator(self):
compiler = self.query.get_compiler(self.db)
if self.flat and len(self._fields) == 1:
for row in self.query.get_compiler(self.db).results_iter():
for row in compiler.results_iter():
yield row[0]
elif not self.query.extra_select and not self.query.annotation_select:
for row in self.query.get_compiler(self.db).results_iter():
for row in compiler.results_iter():
yield tuple(row)
else:
# When extra(select=...) or an annotation is involved, the extra
@@ -1269,7 +1226,7 @@ class ValuesListQuerySet(ValuesQuerySet):
else:
fields = names
for row in self.query.get_compiler(self.db).results_iter():
for row in compiler.results_iter():
data = dict(zip(names, row))
yield tuple(data[f] for f in fields)
@@ -1281,244 +1238,6 @@ class ValuesListQuerySet(ValuesQuerySet):
return clone
def get_klass_info(klass, max_depth=0, cur_depth=0, requested=None,
only_load=None, from_parent=None):
"""
Helper function that recursively returns an information for a klass, to be
used in get_cached_row. It exists just to compute this information only
once for entire queryset. Otherwise it would be computed for each row, which
leads to poor performance on large querysets.
Arguments:
* klass - the class to retrieve (and instantiate)
* max_depth - the maximum depth to which a select_related()
relationship should be explored.
* cur_depth - the current depth in the select_related() tree.
Used in recursive calls to determine if we should dig deeper.
* requested - A dictionary describing the select_related() tree
that is to be retrieved. keys are field names; values are
dictionaries describing the keys on that related object that
are themselves to be select_related().
* only_load - if the query has had only() or defer() applied,
this is the list of field names that will be returned. If None,
the full field list for `klass` can be assumed.
* from_parent - the parent model used to get to this model
Note that when travelling from parent to child, we will only load child
fields which aren't in the parent.
"""
if max_depth and requested is None and cur_depth > max_depth:
# We've recursed deeply enough; stop now.
return None
if only_load:
load_fields = only_load.get(klass) or set()
# When we create the object, we will also be creating populating
# all the parent classes, so traverse the parent classes looking
# for fields that must be included on load.
for parent in klass._meta.get_parent_list():
fields = only_load.get(parent)
if fields:
load_fields.update(fields)
else:
load_fields = None
if load_fields:
# Handle deferred fields.
skip = set()
init_list = []
# Build the list of fields that *haven't* been requested
for field in klass._meta.concrete_fields:
model = field.model._meta.concrete_model
if from_parent and model and issubclass(from_parent, model):
# Avoid loading fields already loaded for parent model for
# child models.
continue
elif field.name not in load_fields:
skip.add(field.attname)
else:
init_list.append(field.attname)
# Retrieve all the requested fields
field_count = len(init_list)
if skip:
klass = deferred_class_factory(klass, skip)
field_names = init_list
else:
field_names = ()
else:
# Load all fields on klass
field_count = len(klass._meta.concrete_fields)
# Check if we need to skip some parent fields.
if from_parent and len(klass._meta.local_concrete_fields) != len(klass._meta.concrete_fields):
# Only load those fields which haven't been already loaded into
# 'from_parent'.
non_seen_models = [p for p in klass._meta.get_parent_list()
if not issubclass(from_parent, p)]
# Load local fields, too...
non_seen_models.append(klass)
field_names = [f.attname for f in klass._meta.concrete_fields
if f.model in non_seen_models]
field_count = len(field_names)
# Try to avoid populating field_names variable for performance reasons.
# If field_names variable is set, we use **kwargs based model init
# which is slower than normal init.
if field_count == len(klass._meta.concrete_fields):
field_names = ()
restricted = requested is not None
related_fields = []
for f in klass._meta.fields:
if select_related_descend(f, restricted, requested, load_fields):
if restricted:
next = requested[f.name]
else:
next = None
klass_info = get_klass_info(f.rel.to, max_depth=max_depth, cur_depth=cur_depth + 1,
requested=next, only_load=only_load)
related_fields.append((f, klass_info))
reverse_related_fields = []
if restricted:
for o in klass._meta.related_objects:
if o.field.unique and select_related_descend(o.field, restricted, requested,
only_load.get(o.related_model), reverse=True):
next = requested[o.field.related_query_name()]
parent = klass if issubclass(o.related_model, klass) else None
klass_info = get_klass_info(o.related_model, max_depth=max_depth, cur_depth=cur_depth + 1,
requested=next, only_load=only_load, from_parent=parent)
reverse_related_fields.append((o.field, klass_info))
if field_names:
pk_idx = field_names.index(klass._meta.pk.attname)
else:
meta = klass._meta
pk_idx = meta.concrete_fields.index(meta.pk)
return klass, field_names, field_count, related_fields, reverse_related_fields, pk_idx
def reorder_for_init(model, field_names, values):
"""
Reorders given field names and values for those fields
to be in the same order as model.__init__() expects to find them.
"""
new_names, new_values = [], []
for f in model._meta.concrete_fields:
if f.attname not in field_names:
continue
new_names.append(f.attname)
new_values.append(values[field_names.index(f.attname)])
assert len(new_names) == len(field_names)
return new_names, new_values
def get_cached_row(row, index_start, using, klass_info, offset=0,
parent_data=()):
"""
Helper function that recursively returns an object with the specified
related attributes already populated.
This method may be called recursively to populate deep select_related()
clauses.
Arguments:
* row - the row of data returned by the database cursor
* index_start - the index of the row at which data for this
object is known to start
* offset - the number of additional fields that are known to
exist in row for `klass`. This usually means the number of
annotated results on `klass`.
* using - the database alias on which the query is being executed.
* klass_info - result of the get_klass_info function
* parent_data - parent model data in format (field, value). Used
to populate the non-local fields of child models.
"""
if klass_info is None:
return None
klass, field_names, field_count, related_fields, reverse_related_fields, pk_idx = klass_info
fields = row[index_start:index_start + field_count]
# If the pk column is None (or the equivalent '' in the case the
# connection interprets empty strings as nulls), then the related
# object must be non-existent - set the relation to None.
if (fields[pk_idx] is None or
(connections[using].features.interprets_empty_strings_as_nulls and
fields[pk_idx] == '')):
obj = None
elif field_names:
values = list(fields)
parent_values = []
parent_field_names = []
for rel_field, value in parent_data:
parent_field_names.append(rel_field.attname)
parent_values.append(value)
field_names, values = reorder_for_init(
klass, parent_field_names + field_names,
parent_values + values)
obj = klass.from_db(using, field_names, values)
else:
field_names = [f.attname for f in klass._meta.concrete_fields]
obj = klass.from_db(using, field_names, fields)
# Instantiate related fields
index_end = index_start + field_count + offset
# Iterate over each related object, populating any
# select_related() fields
for f, klass_info in related_fields:
# Recursively retrieve the data for the related object
cached_row = get_cached_row(row, index_end, using, klass_info)
# If the recursive descent found an object, populate the
# descriptor caches relevant to the object
if cached_row:
rel_obj, index_end = cached_row
if obj is not None:
# If the base object exists, populate the
# descriptor cache
setattr(obj, f.get_cache_name(), rel_obj)
if f.unique and rel_obj is not None:
# If the field is unique, populate the
# reverse descriptor cache on the related object
setattr(rel_obj, f.rel.get_cache_name(), obj)
# Now do the same, but for reverse related objects.
# Only handle the restricted case - i.e., don't do a depth
# descent into reverse relations unless explicitly requested
for f, klass_info in reverse_related_fields:
# Transfer data from this object to childs.
parent_data = []
for rel_field in klass_info[0]._meta.fields:
rel_model = rel_field.model._meta.concrete_model
if rel_model == klass_info[0]._meta.model:
rel_model = None
if rel_model is not None and isinstance(obj, rel_model):
parent_data.append((rel_field, getattr(obj, rel_field.attname)))
# Recursively retrieve the data for the related object
cached_row = get_cached_row(row, index_end, using, klass_info,
parent_data=parent_data)
# If the recursive descent found an object, populate the
# descriptor caches relevant to the object
if cached_row:
rel_obj, index_end = cached_row
if obj is not None:
# populate the reverse descriptor cache
setattr(obj, f.rel.get_cache_name(), rel_obj)
if rel_obj is not None:
# If the related object exists, populate
# the descriptor cache.
setattr(rel_obj, f.get_cache_name(), obj)
# Populate related object caches using parent data.
for rel_field, _ in parent_data:
if rel_field.rel:
setattr(rel_obj, rel_field.attname, getattr(obj, rel_field.attname))
try:
cached_obj = getattr(obj, rel_field.get_cache_name())
setattr(rel_obj, rel_field.get_cache_name(), cached_obj)
except AttributeError:
# Related object hasn't been cached yet
pass
return obj, index_end
class RawQuerySet(object):
"""
Provides an iterator which converts the results of raw SQL queries into
@@ -1569,7 +1288,9 @@ class RawQuerySet(object):
else:
model_cls = self.model
fields = [self.model_fields.get(c, None) for c in self.columns]
converters = compiler.get_converters(fields)
converters = compiler.get_converters([
f.get_col(f.model._meta.db_table) if f else None for f in fields
])
for values in query:
if converters:
values = compiler.apply_converters(values, converters)
@@ -1920,3 +1641,120 @@ def prefetch_one_level(instances, prefetcher, lookup, level):
qs._prefetch_done = True
obj._prefetched_objects_cache[cache_name] = qs
return all_related_objects, additional_lookups
class RelatedPopulator(object):
"""
RelatedPopulator is used for select_related() object instantiation.
The idea is that each select_related() model will be populated by a
different RelatedPopulator instance. The RelatedPopulator instances get
klass_info and select (computed in SQLCompiler) plus the used db as
input for initialization. That data is used to compute which columns
to use, how to instantiate the model, and how to populate the links
between the objects.
The actual creation of the objects is done in populate() method. This
method gets row and from_obj as input and populates the select_related()
model instance.
"""
def __init__(self, klass_info, select, db):
self.db = db
# Pre-compute needed attributes. The attributes are:
# - model_cls: the possibly deferred model class to instantiate
# - either:
# - cols_start, cols_end: usually the columns in the row are
# in the same order model_cls.__init__ expects them, so we
# can instantiate by model_cls(*row[cols_start:cols_end])
# - reorder_for_init: When select_related descends to a child
# class, then we want to reuse the already selected parent
# data. However, in this case the parent data isn't necessarily
# in the same order that Model.__init__ expects it to be, so
# we have to reorder the parent data. The reorder_for_init
# attribute contains a function used to reorder the field data
# in the order __init__ expects it.
# - pk_idx: the index of the primary key field in the reordered
# model data. Used to check if a related object exists at all.
# - init_list: the field attnames fetched from the database. For
# deferred models this isn't the same as all attnames of the
# model's fields.
# - related_populators: a list of RelatedPopulator instances if
# select_related() descends to related models from this model.
# - cache_name, reverse_cache_name: the names to use for setattr
# when assigning the fetched object to the from_obj. If the
# reverse_cache_name is set, then we also set the reverse link.
select_fields = klass_info['select_fields']
from_parent = klass_info['from_parent']
if not from_parent:
self.cols_start = select_fields[0]
self.cols_end = select_fields[-1] + 1
self.init_list = [
f[0].output_field.attname for f in select[self.cols_start:self.cols_end]
]
self.reorder_for_init = None
else:
model_init_attnames = [
f.attname for f in klass_info['model']._meta.concrete_fields
]
reorder_map = []
for idx in select_fields:
field = select[idx][0].output_field
init_pos = model_init_attnames.index(field.attname)
reorder_map.append((init_pos, field.attname, idx))
reorder_map.sort()
self.init_list = [v[1] for v in reorder_map]
pos_list = [row_pos for _, _, row_pos in reorder_map]
def reorder_for_init(row):
return [row[row_pos] for row_pos in pos_list]
self.reorder_for_init = reorder_for_init
self.model_cls = self.get_deferred_cls(klass_info, self.init_list)
self.pk_idx = self.init_list.index(self.model_cls._meta.pk.attname)
self.related_populators = get_related_populators(klass_info, select, self.db)
field = klass_info['field']
reverse = klass_info['reverse']
self.reverse_cache_name = None
if reverse:
self.cache_name = field.rel.get_cache_name()
self.reverse_cache_name = field.get_cache_name()
else:
self.cache_name = field.get_cache_name()
if field.unique:
self.reverse_cache_name = field.rel.get_cache_name()
def get_deferred_cls(self, klass_info, init_list):
model_cls = klass_info['model']
if len(init_list) != len(model_cls._meta.concrete_fields):
init_set = set(init_list)
skip = [
f.attname for f in model_cls._meta.concrete_fields
if f.attname not in init_set
]
model_cls = deferred_class_factory(model_cls, skip)
return model_cls
def populate(self, row, from_obj):
if self.reorder_for_init:
obj_data = self.reorder_for_init(row)
else:
obj_data = row[self.cols_start:self.cols_end]
if obj_data[self.pk_idx] is None:
obj = None
else:
obj = self.model_cls.from_db(self.db, self.init_list, obj_data)
if obj and self.related_populators:
for rel_iter in self.related_populators:
rel_iter.populate(row, obj)
setattr(from_obj, self.cache_name, obj)
if obj and self.reverse_cache_name:
setattr(obj, self.reverse_cache_name, from_obj)
def get_related_populators(klass_info, select, db):
iterators = []
related_klass_infos = klass_info.get('related_klass_infos', [])
for rel_klass_info in related_klass_infos:
rel_cls = RelatedPopulator(rel_klass_info, select, db)
iterators.append(rel_cls)
return iterators