From 02eeb8dd1495c2cc0505d4c667ae9fa216f5d92e Mon Sep 17 00:00:00 2001 From: Luke Plant Date: Sun, 29 Jan 2006 00:22:10 +0000 Subject: [PATCH] magic-removal: first stab at implementing Manager as a QuerySet git-svn-id: http://code.djangoproject.com/svn/django/branches/magic-removal@2150 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/db/models/fields/related.py | 4 +- django/db/models/manager.py | 96 ++++++++- django/db/models/query.py | 300 ++++++++++++++++++++++++++--- 3 files changed, 372 insertions(+), 28 deletions(-) diff --git a/django/db/models/fields/related.py b/django/db/models/fields/related.py index 248a6d8760..2174e19fee 100644 --- a/django/db/models/fields/related.py +++ b/django/db/models/fields/related.py @@ -114,9 +114,9 @@ class ManyRelatedObjectsDescriptor(object): rel_field = self.related.field if self.rel_type == 'o2m': - manager.core_filters = {'%s__%s__exact' % (rel_field.name, rel_field.rel.to._meta.pk.name): getattr(instance, rel_field.rel.get_related_field().attname)} + manager._set_core_filter({'%s__%s__exact' % (rel_field.name, rel_field.rel.to._meta.pk.name): getattr(instance, rel_field.rel.get_related_field().attname)}) else: - manager.core_filters = {'%s__%s__exact' % (rel_field.name, instance_type._meta.pk.name): instance._get_pk_val()} + manager._set_core_filter({'%s__%s__exact' % (rel_field.name, instance_type._meta.pk.name): instance._get_pk_val()}) # Prepare the manager. # TODO: Fix this hack? diff --git a/django/db/models/manager.py b/django/db/models/manager.py index 07f6a28172..303d333aa7 100644 --- a/django/db/models/manager.py +++ b/django/db/models/manager.py @@ -1,11 +1,13 @@ from django.db.models.fields import DateField from django.utils.functional import curry from django.db import backend, connection -from django.db.models.query import Q, parse_lookup, fill_table_cache, get_cached_row +from django.db.models.query import QuerySet +from django.db.models.query import Q, fill_table_cache, get_cached_row # TODO - remove lots of these from django.db.models.query import handle_legacy_orderlist, orderlist2sql, orderfield2column from django.dispatch import dispatcher from django.db.models import signals from django.utils.datastructures import SortedDict +import copy # Size of each "chunk" for get_iterator calls. # Larger values are slightly faster at the expense of more storage space. @@ -17,12 +19,90 @@ def ensure_default_manager(sender): # Create the default manager, if needed. if hasattr(cls, 'objects'): raise ValueError, "Model %s must specify a custom Manager, because it has a field named 'objects'" % name + cls.add_to_class('objects', Manager()) cls.objects._prepare() dispatcher.connect(ensure_default_manager, signal=signals.class_prepared) -class Manager(object): +class Manager(QuerySet): + # Tracks each time a Manager instance is created. Used to retain order. + creation_counter = 0 + + def __init__(self): + super(Manager, self).__init__() + # Increase the creation counter, and save our local copy. + self.creation_counter = Manager.creation_counter + Manager.creation_counter += 1 + self.klass = None + + def _prepare(self): + pass + # TODO + #if self.klass._meta.get_latest_by: + # self.get_latest = self.__get_latest + #for f in self.klass._meta.fields: + # if isinstance(f, DateField): + # setattr(self, 'get_%s_list' % f.name, curry(self.__get_date_list, f)) + + def contribute_to_class(self, klass, name): + # TODO: Use weakref because of possible memory leak / circular reference. + self.klass = klass + dispatcher.connect(self._prepare, signal=signals.class_prepared, sender=klass) + setattr(klass, name, ManagerDescriptor(self)) + if not hasattr(klass, '_default_manager') or self.creation_counter < klass._default_manager.creation_counter: + klass._default_manager = self + + def get(self, **kwargs): + """Gets a single object, using a new query. Keyword arguments are filters.""" + obj_list = list(self.filter(**kwargs)) + if len(obj_list) < 1: + raise self.klass.DoesNotExist, "%s does not exist for %s" % (self.klass._meta.object_name, kwargs) + assert len(obj_list) == 1, "get_object() returned more than one %s -- it returned %s! Lookup parameters were %s" % (self.klass._meta.object_name, len(obj_list), kwargs) + return obj_list[0] + + def in_bulk(self, id_list, **kwargs): + assert isinstance(id_list, list), "in_bulk() must be provided with a list of IDs." + assert id_list != [], "in_bulk() cannot be passed an empty ID list." + new_query = self # we have to do a copy later, so this is OK + if kwargs: + new_query = self.filter(**kwargs) + new_query = new_query.extras(where= + ["%s.%s IN (%s)" % (backend.quote_name(self.klass._meta.db_table), + backend.quote_name(self.klass._meta.pk.column), + ",".join(['%s'] * len(id_list)))], + params=id_list) + obj_list = list(new_query) + return dict([(obj._get_pk_val(), obj) for obj in obj_list]) + + def delete(self, **kwargs): + # Remove the DELETE_ALL argument, if it exists. + delete_all = kwargs.pop('DELETE_ALL', False) + + # Check for at least one query argument. + if not kwargs and not delete_all: + raise TypeError, "SAFETY MECHANISM: Specify DELETE_ALL=True if you actually want to delete all data." + + if kwargs: + del_query = self.filter(**kwargs) + else: + del_query = self._clone() + # disable non-supported fields + del_query._select_related = False + del_query._select = {} + del_query._order_by = [] + del_query._offset = None + del_query._limit = None + + opts = self.klass._meta + + # Perform the SQL delete + cursor = connection.cursor() + _, sql, params = del_query._get_sql_clause(False) + cursor.execute("DELETE " + sql, params) + + +class OldManager(object): # Tracks each time a Manager instance is created. Used to retain order. creation_counter = 0 @@ -279,6 +359,9 @@ class Manager(object): # objects -- MySQL returns the values as strings, instead. return [typecast_timestamp(str(row[0])) for row in cursor.fetchall()] +# DEBUG - to go back to old manager: +# Manager = OldManager + class ManagerDescriptor(object): def __init__(self, manager): self.manager = manager @@ -286,4 +369,11 @@ class ManagerDescriptor(object): def __get__(self, instance, type=None): if instance != None: raise AttributeError, "Manager isn't accessible via %s instances" % type.__name__ - return self.manager + + # HACK + # We need a new instance every time. Otherwise, the cache that + # the manager keeps never gets dropped, which is pain for memory usage, + # and concurrency and means that queries don't get updated when you do + # a model_obj.save(). (This hack helps some tests to pass, but isn't a real fix) + #return self.manager.__class__() + return copy.deepcopy(self.manager) diff --git a/django/db/models/query.py b/django/db/models/query.py index 328ac2f3cd..ebeb11faad 100644 --- a/django/db/models/query.py +++ b/django/db/models/query.py @@ -1,9 +1,14 @@ from django.db import backend, connection from django.db.models.fields import FieldDoesNotExist from django.utils.datastructures import SortedDict +import copy LOOKUP_SEPARATOR = '__' +# Size of each "chunk" for get_iterator calls. +# Larger values are slightly faster at the expense of more storage space. +GET_ITERATOR_CHUNK_SIZE = 100 + #################### # HELPER FUNCTIONS # #################### @@ -46,6 +51,259 @@ def orderlist2sql(order_list, opts, prefix=''): output.append('%s%s ASC' % (prefix, backend.quote_name(orderfield2column(f, opts)))) return ', '.join(output) +class QuerySet(object): + "Represents a lazy database lookup for a set of objects" + # Sub classes need to provide 'opts' member for this class + # to be able to function. + def __init__(self): + self._filter = Q() + self._order_by = () + self._select_related = False + self._distinct = True + self._result_cache = None + self._params = None + self._select = None + self._where = None + self._tables = None + self._offset = None + self._limit = None + + def filter(self, **kwargs): + """Returns a new query instance with the query arguments + ANDed to the existing set""" + clone = self._clone() + clone._filter = self._filter & Q(**kwargs) + return clone + + def unique(self, true_or_false): + """Returns a new query instance with the 'unique' qualifier modified""" + return self._clone(_distinct=true_or_false) + + def order_by(self, *field_names): + """Returns a new query instance with the ordering changed.""" + return self._clone(_order_by=field_names) + + def select_related(self, true_or_false): + """Returns a new query instance with the 'related' qualifier modified""" + return self._clone(_related=true_or_false) + + def count(self): + counter = self._clone() + counter._order_by = [] + + # TODO - do we change these or not? + # e.g. if someone does objects[0:10].count() + # (which + #counter._offset = None + #counter._limit = None + counter._select_related = False + _, sql, params = counter._get_sql_clause(True) + cursor = connection.cursor() + cursor.execute("SELECT COUNT(*)" + sql, params) + return cursor.fetchone()[0] + + # Convenience function for subclasses + def _set_core_filter(self, **kwargs): + """Sets the filters that should always be applied to queries""" + self._filter = Q(**kwargs) + + + def _clone(self, **kwargs): + """Gets a clone of the object, with optional kwargs to alter the clone""" + # Don't clone (even temporarily) the cache + _result_cache_save = self._result_cache + self._result_cache = None + # Must ensure we get fully deep copies of all the query objects + clone = copy.deepcopy(self) + # apply changes to clone + clone.__dict__.update(kwargs) + # restore cache + self._result_cache = _result_cache_save + return clone + + def _ensure_compatible(self, other): + if self._distinct != other._distinct: + raise ValueException, "Can't combine a unique query with a non-unique query" + + def _combine(self, other): + self._ensure_compatible(other) + # get a deepcopy of 'other's order by + # (so that A.filter(args1) & A.filter(args2) does the same as + # A.filter(args1).filter(args2) + combined = other._clone() + # If 'self' is ordered and 'other' isn't, propagate 'self's ordering + if len(self._order_by) > 0 and len(combined._order_by == 0): + combined._order_by = copy.deepcopy(self._order_by) + return combined + + def extras(self, params=None, select=None, where=None, tables=None): + return self._clone(_params=params, _select=select, _where=where, _tables=tables) + + def __and__(self, other): + combined = self._combine(other) + combined._filter = self._filter & other._filter + return combined + + def __or__(self, other): + combined = self._combine(other) + combined._filter = self._filter | other._filter + return combined + + # TODO - allow_joins - do we need it? + def _get_sql_clause(self, allow_joins): + def quote_only_if_word(word): + if ' ' in word: + return word + else: + return backend.quote_name(word) + + # This is defined by sub-classes + # TODO - define a better accessor + opts = self.klass._meta + + # Construct the fundamental parts of the query: SELECT X FROM Y WHERE Z. + select = ["%s.%s" % (backend.quote_name(opts.db_table), backend.quote_name(f.column)) for f in opts.fields] + + tables = [quote_only_if_word(t) for t in (self._tables or [])] + joins = SortedDict() + where = self._where or [] + params = self._params or [] + + # Convert the Q object into SQL. + tables2, joins2, where2, params2 = self._filter.get_sql(opts) + + tables.extend(tables2) + joins.update(joins2) + where.extend(where2) + params.extend(params2) + + # Add additional tables and WHERE clauses based on select_related. + if self._select_related is True: + fill_table_cache(opts, select, tables, where, opts.db_table, [opts.db_table]) + + # Add any additional SELECTs. + if self._select: + select.extend(['(%s) AS %s' % (quote_only_if_word(s[1]), backend.quote_name(s[0])) for s in self._select ]) + + # Start composing the body of the SQL statement. + sql = [" FROM", backend.quote_name(opts.db_table)] + + # Check if extra tables are allowed. If not, throw an error + if (tables or joins) and not allow_joins: + raise TypeError, "Joins are not allowed in this type of query" + + # Compose the join dictionary into SQL describing the joins. + if joins: + sql.append(" ".join(["%s %s AS %s ON %s" % (join_type, table, alias, condition) + for (alias, (table, join_type, condition)) in joins.items()])) + + # Compose the tables clause into SQL. + if tables: + sql.append(", " + ", ".join(tables)) + + # Compose the where clause into SQL. + if where: + sql.append(where and "WHERE " + " AND ".join(where)) + + # ORDER BY clause + order_by = [] + for f in handle_legacy_orderlist(self._order_by): + if f == '?': # Special case. + order_by.append(backend.get_random_function_sql()) + else: + if f.startswith('-'): + col_name = f[1:] + order = "DESC" + else: + col_name = f + order = "ASC" + if "." in col_name: + table_prefix, col_name = col_name.split('.', 1) + table_prefix = backend.quote_name(table_prefix) + '.' + else: + # Use the database table as a column prefix if it wasn't given, + # and if the requested column isn't a custom SELECT. + if "." not in col_name and col_name not in [k[0] for k in (self._select or []) ]: + table_prefix = backend.quote_name(opts.db_table) + '.' + else: + table_prefix = '' + order_by.append('%s%s %s' % (table_prefix, backend.quote_name(orderfield2column(col_name, opts)), order)) + if order_by: + sql.append("ORDER BY " + ", ".join(order_by)) + + # LIMIT and OFFSET clauses + if self._limit is not None: + sql.append("%s " % backend.get_limit_offset_sql(self._limit, self._offset)) + else: + assert self._offset is None, "'offset' is not allowed without 'limit'" + + return select, " ".join(sql), params + + def _fetch_data(self): + if self._result_cache is None: + self._result_cache = list(self.get_iterator()) + + def __iter__(self): + """Gets an iterator for the data""" + # Fetch the data or use get_iterator? If not, we can't + # do sequence operations - or doing so will require re-fetching + # Also, lots of things in current template system break if + # don't get it all. + self._fetch_data() + return iter(self._result_cache) + + def __len__(self): + self._fetch_data() + return len(self._result_cache) + + def __getitem__(self, k): + """Retrieve an item or slice from the set of results""" + # getitem can't return query instances, because .filter() + # and .order_by() methods on the result would break badly. + # This means we don't have to worry about arithmetic with + # self._limit or self._offset - they will both be None + # at this point + if isinstance(k, slice): + # Get a new query if we haven't already got data from db + if self._result_cache is None: + # slice.stop and slice.start + clone = self._clone(_offset=k.start, _limit=k.stop) + return list(clone)[::k.step] + # TODO - we are throwing away this retrieved data. + # We could cache it if we had some kind of sparse + # list structure we could put it in. + else: + return self._result_cache[k] + + else: + # TODO: possibly use a new query which just gets one item + # if we haven't already got them all? + self._fetch_data() + return self._result_cache[k] + + def get_iterator(self): + # self._select is a dictionary, and dictionaries' key order is + # undefined, so we convert it to a list of tuples. + _extra_select = (self._select or {}).items() + + cursor = connection.cursor() + select, sql, params = self._get_sql_clause(True) + cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params) + fill_cache = self._select_related + index_end = len(self.klass._meta.fields) + while 1: + rows = cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE) + if not rows: + raise StopIteration + for row in rows: + if fill_cache: + obj, index_end = get_cached_row(self.klass, row, 0) + else: + obj = self.klass(*row[:index_end]) + for i, k in enumerate(_extra_select): + setattr(obj, k[0], row[index_end+i]) + yield obj + class QOperator: "Base class for QAnd and QOr" def __init__(self, *args): @@ -99,7 +357,7 @@ class QOr(QOperator): raise TypeError, other class Q: - "Encapsulates queries for the 'complex' parameter to Django API functions." + "Encapsulates queries as objects that can be combined logically." def __init__(self, **kwargs): self.kwargs = kwargs @@ -192,42 +450,38 @@ def parse_lookup(kwarg_items, opts): # there for others to implement custom Q()s, etc that return other join # types. tables, joins, where, params = [], SortedDict(), [], [] + for kwarg, value in kwarg_items: - if kwarg in ('order_by', 'limit', 'offset', 'select_related', 'distinct', 'select', 'tables', 'where', 'params'): + if value is None: pass - elif value is None: - pass - elif kwarg == 'complex': - if not hasattr(value, 'get_sql'): - raise TypeError, "'%s' is not a valid query argument" % str(arg) - tables2, joins2, where2, params2 = value.get_sql(opts) - tables.extend(tables2) - joins.update(joins2) - where.extend(where2) - params.extend(params2) - else: # Must be a search parameter. + else: path = kwarg.split(LOOKUP_SEPARATOR) - # Extract the last elements of the kwarg. # The very-last is the clause (equals, like, etc). # The second-last is the table column on which the clause is # to be performed. - # The only exception to this is "pk", which is an implicit - # id__exact; if we find "pk", make the clause "exact', and - # insert a dummy name of None, which we will replace when - # we know which table column to grab as the primary key. + # The exceptions to this are: + # 1) "pk", which is an implicit id__exact; + # if we find "pk", make the clause "exact', and insert + # a dummy name of None, which we will replace when + # we know which table column to grab as the primary key. + # 2) If there is only one part, assume it to be an __exact clause = path.pop() if clause == 'pk': clause = 'exact' path.append(None) + elif len(path) == 0: + path.append(clause) + clause = 'exact' + if len(path) < 1: raise TypeError, "Cannot parse keyword query %r" % kwarg - tables2, joins2, where2, params2 = lookup_inner(path, clause, value, opts, opts.db_table, None) - tables.extend(tables2) - joins.update(joins2) - where.extend(where2) - params.extend(params2) + tables2, joins2, where2, params2 = lookup_inner(path, clause, value, opts, opts.db_table, None) + tables.extend(tables2) + joins.update(joins2) + where.extend(where2) + params.extend(params2) return tables, joins, where, params class FieldFound(Exception):