queryset-refactor: Converted the queryset iterator to be a real iterator and

only populate the result cache on demand. We actually populate the result cache 100 elements at a time, rather than one at a time for efficiency, but this is a real win when the resultset contains 10,000 objects for example. This also provides an efficient boolean (__nonzero__) test that doesn't use up a lot of memory if you don't read all the results. Refs #2430, #5987. git-svn-id: http://code.djangoproject.com/svn/django/branches/queryset-refactor@7030 bcc190cf-cafb-0310-a4f2-bffc1f526a37
2025-07-05 18:29:11 +00:00 · 2008-01-26 13:23:54 +00:00 · 2008-01-26 13:23:54 +00:00 · dd2251a653
commit dd2251a653
parent 98abf27535
2 changed files with 85 additions and 7 deletions
--- a/django/db/models/query.py
+++ b/django/db/models/query.py
@ -21,6 +21,7 @@ except NameError:
 # Used to control how many objects are worked with at once in some cases (e.g.
 # when deleting objects).
 CHUNK_SIZE = 100
+ITER_CHUNK_SIZE = CHUNK_SIZE

 class _QuerySet(object):
    "Represents a lazy database lookup for a set of objects"
@ -28,19 +29,40 @@ class _QuerySet(object):
        self.model = model
        self.query = query or sql.Query(self.model, connection)
        self._result_cache = None
+        self._iter = None

    ########################
    # PYTHON MAGIC METHODS #
    ########################

    def __repr__(self):
-        return repr(self._get_data())
+        return repr(list(iter(self)))

    def __len__(self):
-        return len(self._get_data())
+        return len(list(iter(self)))

    def __iter__(self):
-        return iter(self._get_data())
+        pos = 0
+        if self._result_cache is None:
+            self._iter = self.iterator()
+            self._result_cache = []
+        while 1:
+            upper = len(self._result_cache)
+            while pos < upper:
+                yield self._result_cache[pos]
+                pos = pos + 1
+            if not self._iter:
+                raise StopIteration
+            if len(self._result_cache) <= pos:
+                self._fill_cache()
+
+    def __nonzero__(self):
+        if self._result_cache is None:
+            try:
+                iter(self).next()
+            except StopIteration:
+                return False
+        return True

    def __getitem__(self, k):
        "Retrieve an item or slice from the set of results."
@ -52,6 +74,15 @@ class _QuerySet(object):
                "Negative indexing is not supported."

        if self._result_cache is not None:
+            if self._iter is not None:
+                # The result cache has only been partially populated, so we may
+                # need to fill it out a bit more.
+                if isinstance(k, slice):
+                    bound = k.stop
+                else:
+                    bound = k + 1
+                if len(self._result_cache) < bound:
+                    self._fill_cache(bound - len(self._result_cache))
            return self._result_cache[k]

        if isinstance(k, slice):
@ -375,10 +406,17 @@ class _QuerySet(object):
            c._setup_query()
        return c

-    def _get_data(self):
-        if self._result_cache is None:
-            self._result_cache = list(self.iterator())
-        return self._result_cache
+    def _fill_cache(self, num=None):
+        """
+        Fills the result cache with 'num' more entries (or until the results
+        iterator is exhausted).
+        """
+        if self._iter:
+            try:
+                for i in range(num or ITER_CHUNK_SIZE):
+                    self._result_cache.append(self._iter.next())
+            except StopIteration:
+                self._iter = None

 # Use the backend's QuerySet class if it defines one. Otherwise, use _QuerySet.
 if connection.features.uses_custom_queryset:
@ -395,6 +433,9 @@ class ValuesQuerySet(QuerySet):
        # QuerySet.clone() will also set up the _fields attribute with the
        # names of the model fields to select.

+    def __iter__(self):
+        return self.iterator()
+
    def iterator(self):
        self.field_names.extend([f for f in self.query.extra_select.keys()])
        for row in self.query.results_iter():
--- a/tests/regressiontests/queries/models.py
+++ b/tests/regressiontests/queries/models.py
@ -501,5 +501,42 @@ Bug #6203
 2
 >>> len(Item.objects.dates('created', 'day'))
 2
+
+Test that parallel iterators work.
+
+>>> qs = Tag.objects.all()
+>>> i1, i2 = iter(qs), iter(qs)
+>>> i1.next(), i1.next()
+(<Tag: t1>, <Tag: t2>)
+>>> i2.next(), i2.next(), i2.next()
+(<Tag: t1>, <Tag: t2>, <Tag: t3>)
+>>> i1.next()
+<Tag: t3>
+
+We can do slicing beyond what is currently in the result cache, too.
+
+# We need to mess with the implemenation internals a bit here to decrease the
+# cache fill size so that we don't read all the results at once.
+>>> from django.db.models import query
+>>> query.ITER_CHUNK_SIZE = 2
+>>> qs = Tag.objects.all()
+
+# Fill the cache with the first chunk.
+>>> bool(qs)
+True
+>>> len(qs._result_cache)
+2
+
+# Query beyond the end of the cache and check that it is filled out as required.
+>>> qs[4]
+<Tag: t5>
+>>> len(qs._result_cache)
+5
+
+# But querying beyond the end of the result set will fail.
+>>> qs[100]
+Traceback (most recent call last):
+...
+IndexError: ...
 """}