queryset-refactor: More whack-a-mole optimisation work. Still got a couple of big spots to go, though.

git-svn-id: http://code.djangoproject.com/svn/django/branches/queryset-refactor@7253 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2025-07-05 18:29:11 +00:00 · 2008-03-16 16:18:39 +00:00 · 2008-03-16 16:18:39 +00:00 · 067d380e98
commit 067d380e98
parent 50a1880100
6 changed files with 67 additions and 84 deletions
--- a/django/db/models/fields/init.py
+++ b/django/db/models/fields/init.py
@ -160,11 +160,10 @@ class Field(object):
        # mapped to one of the built-in Django field types. In this case, you
        # can implement db_type() instead of get_internal_type() to specify
        # exactly which wacky database column type you want to use.
-        data_types = get_creation_module().DATA_TYPES
+        try:
-        internal_type = self.get_internal_type()
+            return get_creation_module().DATA_TYPES[self.get_internal_type()] % self.__dict__
-        if internal_type not in data_types:
+        except KeyError:
            return None
        return data_types[internal_type] % self.__dict__
    def validate_full(self, field_data, all_data):
        """
--- a/django/db/models/options.py
+++ b/django/db/models/options.py
@ -123,6 +123,7 @@ class Options(object):
            self.setup_pk(field)
            if hasattr(self, '_field_cache'):
                del self._field_cache
                del self._field_name_cache
        if hasattr(self, '_name_map'):
            del self._name_map
@ -155,17 +156,20 @@ class Options(object):
        """
        The getter for self.fields. This returns the list of field objects
        available to this model (including through parent models).
        Callers are not permitted to modify this list, since it's a reference
        to this instance (not a copy).
        """
        try:
-            self._field_cache
+            self._field_name_cache
        except AttributeError:
            self._fill_fields_cache()
-        return self._field_cache.keys()
+        return self._field_name_cache
    fields = property(_fields)
    def get_fields_with_model(self):
        """
-        Returns a list of (field, model) pairs for all fields. The "model"
+        Returns a sequence of (field, model) pairs for all fields. The "model"
        element is None for fields on the current model. Mostly of use when
        constructing queries so that we know which model a field belongs to.
        """
@ -173,19 +177,19 @@ class Options(object):
            self._field_cache
        except AttributeError:
            self._fill_fields_cache()
-        return self._field_cache.items()
+        return self._field_cache
    def _fill_fields_cache(self):
-        cache = SortedDict()
+        cache = []
        for parent in self.parents:
            for field, model in parent._meta.get_fields_with_model():
                if model:
-                    cache[field] = model
+                    cache.append((field, model))
                else:
-                    cache[field] = parent
+                    cache.append((field, parent))
-        for field in self.local_fields:
+        cache.extend([(f, None) for f in self.local_fields])
-            cache[field] = None
+        self._field_cache = tuple(cache)
-        self._field_cache = cache
+        self._field_name_cache = [x for x, _ in cache]
    def _many_to_many(self):
        try:
--- a/django/db/models/query.py
+++ b/django/db/models/query.py
@ -31,10 +31,9 @@ class _QuerySet(object):
        return repr(list(self))
    def __len__(self):
-        # Since __len__ is called quite frequently (as part of list(qs), which
+        # Since __len__ is called quite frequently (for example, as part of
-        # means as part of qs.get(), for example), we make some effort here to
+        # list(qs), we make some effort here to be as efficient as possible
-        # be as efficient as possible whilst not messing up any existing
+        # whilst not messing up any existing iterators against the queryset.
        # iterators against the queryset.
        if self._result_cache is None:
            if self._iter:
                self._result_cache = list(self._iter())
@ -50,6 +49,8 @@ class _QuerySet(object):
            self._result_cache = []
        if self._iter:
            return self._result_iter()
        # Python's list iterator is better than our version when we're just
        # iterating over the cache.
        return iter(self._result_cache)
    def _result_iter(self):
--- a/django/db/models/sql/query.py
+++ b/django/db/models/sql/query.py
@ -7,6 +7,7 @@ databases). The abstraction barrier only works one way: this module has to know
 all about the internals of models in order to get the information it needs.
 """
 import itertools
 from copy import deepcopy
 from django.utils.tree import Node
@ -223,13 +224,13 @@ class Query(object):
        if where:
            result.append('WHERE %s' % where)
            params.extend(w_params)
        if self.extra_where:
            if not where:
                result.append('WHERE')
            else:
                result.append('AND')
            result.append(' AND'.join(self.extra_where))
        params.extend(w_params)
        if self.group_by:
            grouping = self.get_grouping()
@ -361,39 +362,40 @@ class Query(object):
                    if hasattr(col, 'alias'):
                        aliases.append(col.alias)
        elif self.default_cols:
-            result = self.get_default_columns(lambda x, y: "%s.%s" % (qn(x), qn(y)))
+            result = self.get_default_columns(True)
            aliases = result[:]
        result.extend(['(%s) AS %s' % (col, alias)
                for alias, col in self.extra_select.items()])
        aliases.extend(self.extra_select.keys())
-        self._select_aliases = dict.fromkeys(aliases)
+        self._select_aliases = set(aliases)
        return result
-    def get_default_columns(self, combine_func=None):
+    def get_default_columns(self, as_str=False):
        """
        Computes the default columns for selecting every field in the base
        model. Returns a list of default (alias, column) pairs suitable for
-        direct inclusion as the select columns. The 'combine_func' can be
+        inclusion as the select columns. If 'as_str' is True, returns a list of
-        passed in to change the returned data set to a list of some other
+        strings, quoted appropriately for use in SQL directly.
        structure.
        """
        # Note: We allow 'combine_func' here because this method is called a
        # lot. The extra overhead from returning a list and then transforming
        # it in get_columns() hurt performance in a measurable way.
        result = []
        table_alias = self.tables[0]
        root_pk = self.model._meta.pk.column
        seen = {None: table_alias}
        qn = self.quote_name_unless_alias
        qn2 = self.connection.ops.quote_name
        for field, model in self.model._meta.get_fields_with_model():
-            if model not in seen:
+            try:
-                seen[model] = self.join((table_alias, model._meta.db_table,
+                alias = seen[model]
            except KeyError:
                alias = self.join((table_alias, model._meta.db_table,
                        root_pk, model._meta.pk.column))
-            if combine_func:
+                seen[model] = alias
-                result.append(combine_func(seen[model], field.column))
+            if as_str:
                result.append('%s.%s' % (qn(alias), qn2(field.column)))
            else:
-                result.append((seen[model], field.column))
+                result.append((alias, field.column))
        return result
    def get_from_clause(self):
@ -897,7 +899,7 @@ class Query(object):
            # from any previous joins (ref count is 1 in the table list), we
            # make the new additions (and any existing ones not used in the new
            # join list) an outer join.
-            join_it = nested_iter(join_list)
+            join_it = itertools.chain(*join_list)
            table_it = iter(self.tables)
            join_it.next(), table_it.next()
            for join in join_it:
@ -1325,21 +1327,19 @@ class Query(object):
                raise EmptyResultSet
        except EmptyResultSet:
            if result_type == MULTI:
-                raise StopIteration
+                return empty_iter()
            else:
                return
        cursor = self.connection.cursor()
        cursor.execute(sql, params)
-        if result_type is None:
+        if not result_type:
            return cursor
        if result_type == SINGLE:
            return cursor.fetchone()
        # The MULTI case.
-        return results_iter(cursor)
+        return iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)), [])
 def get_order_dir(field, default='ASC'):
    """
@ -1354,24 +1354,11 @@ def get_order_dir(field, default='ASC'):
        return field[1:], dirn[1]
    return field, dirn[0]
-def results_iter(cursor):
+def empty_iter():
    """
-    An iterator over the result set that returns a chunk of rows at a time.
+    Returns an iterator containing no results.
    """
-    while 1:
+    yield iter([]).next()
        rows = cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)
        if not rows:
            raise StopIteration
        yield rows
 def nested_iter(nested):
    """
    An iterator over a sequence of sequences. Each element is returned in turn.
    Only handles one level of nesting, since that's all we need here.
    """
    for seq in nested:
        for elt in seq:
            yield elt
 def setup_join_cache(sender):
    """
--- a/django/db/models/sql/subqueries.py
+++ b/django/db/models/sql/subqueries.py
@ -238,40 +238,29 @@ class UpdateQuery(Query):
 class InsertQuery(Query):
    def __init__(self, *args, **kwargs):
        super(InsertQuery, self).__init__(*args, **kwargs)
        self._setup_query()
    def _setup_query(self):
        """
        Run on initialisation and after cloning.
        """
        self.columns = []
        self.values = []
        self.params = ()
    def clone(self, klass=None, **kwargs):
        extras = {'columns': self.columns[:], 'values': self.values[:],
                'params': self.params}
        return super(InsertQuery, self).clone(klass, extras)
    def as_sql(self):
-        self.select_related = False
+        # We don't need quote_name_unless_alias() here, since these are all
-        self.pre_sql_setup()
+        # going to be column names (so we can avoid the extra overhead).
-        qn = self.quote_name_unless_alias
+        qn = self.connection.ops.quote_name
-        result = ['INSERT INTO %s' % qn(self.tables[0])]
+        result = ['INSERT INTO %s' % qn(self.model._meta.db_table)]
        result.append('(%s)' % ', '.join([qn(c) for c in self.columns]))
-        result.append('VALUES (')
+        result.append('VALUES (%s)' % ', '.join(self.values))
-        params = []
+        return ' '.join(result), self.params
        first = True
        for value in self.values:
            prefix = not first and ', ' or ''
            if isinstance(value, RawValue):
                result.append('%s%s' % (prefix, value.value))
            else:
                result.append('%s%%s' % prefix)
                params.append(value)
            first = False
        result.append(')')
        return ' '.join(result), tuple(params)
    def execute_sql(self, return_id=False):
        cursor = super(InsertQuery, self).execute_sql(None)
        if return_id:
-            return self.connection.ops.last_insert_id(cursor, self.tables[0],
+            return self.connection.ops.last_insert_id(cursor,
-                    self.model._meta.pk.column)
+                    self.model._meta.db_table, self.model._meta.pk.column)
    def insert_values(self, insert_values, raw_values=False):
        """
@ -285,16 +274,18 @@ class InsertQuery(Query):
        """
        func = lambda x: self.model._meta.get_field_by_name(x)[0].column
        # keys() and values() return items in the same order, providing the
-        # dictionary hasn't changed between calls. So these lines work as
+        # dictionary hasn't changed between calls. So the dual iteration here
-        # intended.
+        # works as intended.
        for name in insert_values:
            if name == 'pk':
                name = self.model._meta.pk.name
            self.columns.append(func(name))
        if raw_values:
            self.values.extend([RawValue(v) for v in insert_values.values()])
        else:
            self.values.extend(insert_values.values())
        else:
            values = insert_values.values()
            self.params += tuple(values)
            self.values.extend(['%s'] * len(values))
 class DateQuery(Query):
    """
--- a/tests/modeltests/model_inheritance/models.py
+++ b/tests/modeltests/model_inheritance/models.py
@ -263,5 +263,6 @@ True
 <Chef: Albert the chef>
 >>> len(db.connection.queries)
 3
 >>> settings.DEBUG = False
 """}