From dd2251a653f45406a720203c35e40adac5411b5a Mon Sep 17 00:00:00 2001 From: Malcolm Tredinnick Date: Sat, 26 Jan 2008 13:23:54 +0000 Subject: [PATCH] queryset-refactor: Converted the queryset iterator to be a real iterator and only populate the result cache on demand. We actually populate the result cache 100 elements at a time, rather than one at a time for efficiency, but this is a real win when the resultset contains 10,000 objects for example. This also provides an efficient boolean (__nonzero__) test that doesn't use up a lot of memory if you don't read all the results. Refs #2430, #5987. git-svn-id: http://code.djangoproject.com/svn/django/branches/queryset-refactor@7030 bcc190cf-cafb-0310-a4f2-bffc1f526a37 --- django/db/models/query.py | 55 +++++++++++++++++++++---- tests/regressiontests/queries/models.py | 37 +++++++++++++++++ 2 files changed, 85 insertions(+), 7 deletions(-) diff --git a/django/db/models/query.py b/django/db/models/query.py index 46c6eec720..cd153a49dc 100644 --- a/django/db/models/query.py +++ b/django/db/models/query.py @@ -21,6 +21,7 @@ except NameError: # Used to control how many objects are worked with at once in some cases (e.g. # when deleting objects). CHUNK_SIZE = 100 +ITER_CHUNK_SIZE = CHUNK_SIZE class _QuerySet(object): "Represents a lazy database lookup for a set of objects" @@ -28,19 +29,40 @@ class _QuerySet(object): self.model = model self.query = query or sql.Query(self.model, connection) self._result_cache = None + self._iter = None ######################## # PYTHON MAGIC METHODS # ######################## def __repr__(self): - return repr(self._get_data()) + return repr(list(iter(self))) def __len__(self): - return len(self._get_data()) + return len(list(iter(self))) def __iter__(self): - return iter(self._get_data()) + pos = 0 + if self._result_cache is None: + self._iter = self.iterator() + self._result_cache = [] + while 1: + upper = len(self._result_cache) + while pos < upper: + yield self._result_cache[pos] + pos = pos + 1 + if not self._iter: + raise StopIteration + if len(self._result_cache) <= pos: + self._fill_cache() + + def __nonzero__(self): + if self._result_cache is None: + try: + iter(self).next() + except StopIteration: + return False + return True def __getitem__(self, k): "Retrieve an item or slice from the set of results." @@ -52,6 +74,15 @@ class _QuerySet(object): "Negative indexing is not supported." if self._result_cache is not None: + if self._iter is not None: + # The result cache has only been partially populated, so we may + # need to fill it out a bit more. + if isinstance(k, slice): + bound = k.stop + else: + bound = k + 1 + if len(self._result_cache) < bound: + self._fill_cache(bound - len(self._result_cache)) return self._result_cache[k] if isinstance(k, slice): @@ -375,10 +406,17 @@ class _QuerySet(object): c._setup_query() return c - def _get_data(self): - if self._result_cache is None: - self._result_cache = list(self.iterator()) - return self._result_cache + def _fill_cache(self, num=None): + """ + Fills the result cache with 'num' more entries (or until the results + iterator is exhausted). + """ + if self._iter: + try: + for i in range(num or ITER_CHUNK_SIZE): + self._result_cache.append(self._iter.next()) + except StopIteration: + self._iter = None # Use the backend's QuerySet class if it defines one. Otherwise, use _QuerySet. if connection.features.uses_custom_queryset: @@ -395,6 +433,9 @@ class ValuesQuerySet(QuerySet): # QuerySet.clone() will also set up the _fields attribute with the # names of the model fields to select. + def __iter__(self): + return self.iterator() + def iterator(self): self.field_names.extend([f for f in self.query.extra_select.keys()]) for row in self.query.results_iter(): diff --git a/tests/regressiontests/queries/models.py b/tests/regressiontests/queries/models.py index e1d7c4a258..67f2cc098d 100644 --- a/tests/regressiontests/queries/models.py +++ b/tests/regressiontests/queries/models.py @@ -501,5 +501,42 @@ Bug #6203 2 >>> len(Item.objects.dates('created', 'day')) 2 + +Test that parallel iterators work. + +>>> qs = Tag.objects.all() +>>> i1, i2 = iter(qs), iter(qs) +>>> i1.next(), i1.next() +(, ) +>>> i2.next(), i2.next(), i2.next() +(, , ) +>>> i1.next() + + +We can do slicing beyond what is currently in the result cache, too. + +# We need to mess with the implemenation internals a bit here to decrease the +# cache fill size so that we don't read all the results at once. +>>> from django.db.models import query +>>> query.ITER_CHUNK_SIZE = 2 +>>> qs = Tag.objects.all() + +# Fill the cache with the first chunk. +>>> bool(qs) +True +>>> len(qs._result_cache) +2 + +# Query beyond the end of the cache and check that it is filled out as required. +>>> qs[4] + +>>> len(qs._result_cache) +5 + +# But querying beyond the end of the result set will fail. +>>> qs[100] +Traceback (most recent call last): +... +IndexError: ... """}