queryset-refactor: More whack-a-mole optimisation work. Still got a couple of big spots to go, though.

git-svn-id: http://code.djangoproject.com/svn/django/branches/queryset-refactor@7253 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2025-07-05 18:29:11 +00:00 · 2008-03-16 16:18:39 +00:00 · 2008-03-16 16:18:39 +00:00 · 067d380e98
commit 067d380e98
parent 50a1880100
6 changed files with 67 additions and 84 deletions
--- a/django/db/models/fields/init.py
+++ b/django/db/models/fields/init.py
@ -160,11 +160,10 @@ class Field(object):
        # mapped to one of the built-in Django field types. In this case, you
        # can implement db_type() instead of get_internal_type() to specify
        # exactly which wacky database column type you want to use.
-        data_types = get_creation_module().DATA_TYPES
-        internal_type = self.get_internal_type()
-        if internal_type not in data_types:
+        try:
+            return get_creation_module().DATA_TYPES[self.get_internal_type()] % self.__dict__
+        except KeyError:
            return None
-        return data_types[internal_type] % self.__dict__

    def validate_full(self, field_data, all_data):
        """
--- a/django/db/models/options.py
+++ b/django/db/models/options.py
@ -123,6 +123,7 @@ class Options(object):
            self.setup_pk(field)
            if hasattr(self, '_field_cache'):
                del self._field_cache
+                del self._field_name_cache

        if hasattr(self, '_name_map'):
            del self._name_map
@ -155,17 +156,20 @@ class Options(object):
        """
        The getter for self.fields. This returns the list of field objects
        available to this model (including through parent models).
+
+        Callers are not permitted to modify this list, since it's a reference
+        to this instance (not a copy).
        """
        try:
-            self._field_cache
+            self._field_name_cache
        except AttributeError:
            self._fill_fields_cache()
-        return self._field_cache.keys()
+        return self._field_name_cache
    fields = property(_fields)

    def get_fields_with_model(self):
        """
-        Returns a list of (field, model) pairs for all fields. The "model"
+        Returns a sequence of (field, model) pairs for all fields. The "model"
        element is None for fields on the current model. Mostly of use when
        constructing queries so that we know which model a field belongs to.
        """
@ -173,19 +177,19 @@ class Options(object):
            self._field_cache
        except AttributeError:
            self._fill_fields_cache()
-        return self._field_cache.items()
+        return self._field_cache

    def _fill_fields_cache(self):
-        cache = SortedDict()
+        cache = []
        for parent in self.parents:
            for field, model in parent._meta.get_fields_with_model():
                if model:
-                    cache[field] = model
+                    cache.append((field, model))
                else:
-                    cache[field] = parent
-        for field in self.local_fields:
-            cache[field] = None
-        self._field_cache = cache
+                    cache.append((field, parent))
+        cache.extend([(f, None) for f in self.local_fields])
+        self._field_cache = tuple(cache)
+        self._field_name_cache = [x for x, _ in cache]

    def _many_to_many(self):
        try:
--- a/django/db/models/query.py
+++ b/django/db/models/query.py
@ -31,10 +31,9 @@ class _QuerySet(object):
        return repr(list(self))

    def __len__(self):
-        # Since __len__ is called quite frequently (as part of list(qs), which
-        # means as part of qs.get(), for example), we make some effort here to
-        # be as efficient as possible whilst not messing up any existing
-        # iterators against the queryset.
+        # Since __len__ is called quite frequently (for example, as part of
+        # list(qs), we make some effort here to be as efficient as possible
+        # whilst not messing up any existing iterators against the queryset.
        if self._result_cache is None:
            if self._iter:
                self._result_cache = list(self._iter())
@ -50,6 +49,8 @@ class _QuerySet(object):
            self._result_cache = []
        if self._iter:
            return self._result_iter()
+        # Python's list iterator is better than our version when we're just
+        # iterating over the cache.
        return iter(self._result_cache)

    def _result_iter(self):
--- a/django/db/models/sql/query.py
+++ b/django/db/models/sql/query.py
@ -7,6 +7,7 @@ databases). The abstraction barrier only works one way: this module has to know
 all about the internals of models in order to get the information it needs.
 """

+import itertools
 from copy import deepcopy

 from django.utils.tree import Node
@ -223,13 +224,13 @@ class Query(object):

        if where:
            result.append('WHERE %s' % where)
+            params.extend(w_params)
        if self.extra_where:
            if not where:
                result.append('WHERE')
            else:
                result.append('AND')
            result.append(' AND'.join(self.extra_where))
-        params.extend(w_params)

        if self.group_by:
            grouping = self.get_grouping()
@ -361,39 +362,40 @@ class Query(object):
                    if hasattr(col, 'alias'):
                        aliases.append(col.alias)
        elif self.default_cols:
-            result = self.get_default_columns(lambda x, y: "%s.%s" % (qn(x), qn(y)))
+            result = self.get_default_columns(True)
            aliases = result[:]

        result.extend(['(%s) AS %s' % (col, alias)
                for alias, col in self.extra_select.items()])
        aliases.extend(self.extra_select.keys())

-        self._select_aliases = dict.fromkeys(aliases)
+        self._select_aliases = set(aliases)
        return result

-    def get_default_columns(self, combine_func=None):
+    def get_default_columns(self, as_str=False):
        """
        Computes the default columns for selecting every field in the base
        model. Returns a list of default (alias, column) pairs suitable for
-        direct inclusion as the select columns. The 'combine_func' can be
-        passed in to change the returned data set to a list of some other
-        structure.
+        inclusion as the select columns. If 'as_str' is True, returns a list of
+        strings, quoted appropriately for use in SQL directly.
        """
-        # Note: We allow 'combine_func' here because this method is called a
-        # lot. The extra overhead from returning a list and then transforming
-        # it in get_columns() hurt performance in a measurable way.
        result = []
        table_alias = self.tables[0]
        root_pk = self.model._meta.pk.column
        seen = {None: table_alias}
+        qn = self.quote_name_unless_alias
+        qn2 = self.connection.ops.quote_name
        for field, model in self.model._meta.get_fields_with_model():
-            if model not in seen:
-                seen[model] = self.join((table_alias, model._meta.db_table,
+            try:
+                alias = seen[model]
+            except KeyError:
+                alias = self.join((table_alias, model._meta.db_table,
                        root_pk, model._meta.pk.column))
-            if combine_func:
-                result.append(combine_func(seen[model], field.column))
+                seen[model] = alias
+            if as_str:
+                result.append('%s.%s' % (qn(alias), qn2(field.column)))
            else:
-                result.append((seen[model], field.column))
+                result.append((alias, field.column))
        return result

    def get_from_clause(self):
@ -897,7 +899,7 @@ class Query(object):
            # from any previous joins (ref count is 1 in the table list), we
            # make the new additions (and any existing ones not used in the new
            # join list) an outer join.
-            join_it = nested_iter(join_list)
+            join_it = itertools.chain(*join_list)
            table_it = iter(self.tables)
            join_it.next(), table_it.next()
            for join in join_it:
@ -1325,21 +1327,19 @@ class Query(object):
                raise EmptyResultSet
        except EmptyResultSet:
            if result_type == MULTI:
-                raise StopIteration
+                return empty_iter()
            else:
                return

        cursor = self.connection.cursor()
        cursor.execute(sql, params)

-        if result_type is None:
+        if not result_type:
            return cursor
-
        if result_type == SINGLE:
            return cursor.fetchone()
-
        # The MULTI case.
-        return results_iter(cursor)
+        return iter((lambda: cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)), [])

 def get_order_dir(field, default='ASC'):
    """
@ -1354,24 +1354,11 @@ def get_order_dir(field, default='ASC'):
        return field[1:], dirn[1]
    return field, dirn[0]

-def results_iter(cursor):
+def empty_iter():
    """
-    An iterator over the result set that returns a chunk of rows at a time.
+    Returns an iterator containing no results.
    """
-    while 1:
-        rows = cursor.fetchmany(GET_ITERATOR_CHUNK_SIZE)
-        if not rows:
-            raise StopIteration
-        yield rows
-
-def nested_iter(nested):
-    """
-    An iterator over a sequence of sequences. Each element is returned in turn.
-    Only handles one level of nesting, since that's all we need here.
-    """
-    for seq in nested:
-        for elt in seq:
-            yield elt
+    yield iter([]).next()

 def setup_join_cache(sender):
    """
--- a/django/db/models/sql/subqueries.py
+++ b/django/db/models/sql/subqueries.py
@ -238,40 +238,29 @@ class UpdateQuery(Query):
 class InsertQuery(Query):
    def __init__(self, *args, **kwargs):
        super(InsertQuery, self).__init__(*args, **kwargs)
-        self._setup_query()
-
-    def _setup_query(self):
-        """
-        Run on initialisation and after cloning.
-        """
        self.columns = []
        self.values = []
+        self.params = ()
+
+    def clone(self, klass=None, **kwargs):
+        extras = {'columns': self.columns[:], 'values': self.values[:],
+                'params': self.params}
+        return super(InsertQuery, self).clone(klass, extras)

    def as_sql(self):
-        self.select_related = False
-        self.pre_sql_setup()
-        qn = self.quote_name_unless_alias
-        result = ['INSERT INTO %s' % qn(self.tables[0])]
+        # We don't need quote_name_unless_alias() here, since these are all
+        # going to be column names (so we can avoid the extra overhead).
+        qn = self.connection.ops.quote_name
+        result = ['INSERT INTO %s' % qn(self.model._meta.db_table)]
        result.append('(%s)' % ', '.join([qn(c) for c in self.columns]))
-        result.append('VALUES (')
-        params = []
-        first = True
-        for value in self.values:
-            prefix = not first and ', ' or ''
-            if isinstance(value, RawValue):
-                result.append('%s%s' % (prefix, value.value))
-            else:
-                result.append('%s%%s' % prefix)
-                params.append(value)
-            first = False
-        result.append(')')
-        return ' '.join(result), tuple(params)
+        result.append('VALUES (%s)' % ', '.join(self.values))
+        return ' '.join(result), self.params

    def execute_sql(self, return_id=False):
        cursor = super(InsertQuery, self).execute_sql(None)
        if return_id:
-            return self.connection.ops.last_insert_id(cursor, self.tables[0],
-                    self.model._meta.pk.column)
+            return self.connection.ops.last_insert_id(cursor,
+                    self.model._meta.db_table, self.model._meta.pk.column)

    def insert_values(self, insert_values, raw_values=False):
        """
@ -285,16 +274,18 @@ class InsertQuery(Query):
        """
        func = lambda x: self.model._meta.get_field_by_name(x)[0].column
        # keys() and values() return items in the same order, providing the
-        # dictionary hasn't changed between calls. So these lines work as
-        # intended.
+        # dictionary hasn't changed between calls. So the dual iteration here
+        # works as intended.
        for name in insert_values:
            if name == 'pk':
                name = self.model._meta.pk.name
            self.columns.append(func(name))
        if raw_values:
-            self.values.extend([RawValue(v) for v in insert_values.values()])
-        else:
            self.values.extend(insert_values.values())
+        else:
+            values = insert_values.values()
+            self.params += tuple(values)
+            self.values.extend(['%s'] * len(values))

 class DateQuery(Query):
    """
--- a/tests/modeltests/model_inheritance/models.py
+++ b/tests/modeltests/model_inheritance/models.py
@ -263,5 +263,6 @@ True
 <Chef: Albert the chef>
 >>> len(db.connection.queries)
 3
+>>> settings.DEBUG = False

 """}