mirror of
https://github.com/django/django.git
synced 2025-07-05 18:29:11 +00:00
queryset-refactor: Converted the queryset iterator to be a real iterator and
only populate the result cache on demand. We actually populate the result cache 100 elements at a time, rather than one at a time for efficiency, but this is a real win when the resultset contains 10,000 objects for example. This also provides an efficient boolean (__nonzero__) test that doesn't use up a lot of memory if you don't read all the results. Refs #2430, #5987. git-svn-id: http://code.djangoproject.com/svn/django/branches/queryset-refactor@7030 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
parent
98abf27535
commit
dd2251a653
@ -21,6 +21,7 @@ except NameError:
|
||||
# Used to control how many objects are worked with at once in some cases (e.g.
|
||||
# when deleting objects).
|
||||
CHUNK_SIZE = 100
|
||||
ITER_CHUNK_SIZE = CHUNK_SIZE
|
||||
|
||||
class _QuerySet(object):
|
||||
"Represents a lazy database lookup for a set of objects"
|
||||
@ -28,19 +29,40 @@ class _QuerySet(object):
|
||||
self.model = model
|
||||
self.query = query or sql.Query(self.model, connection)
|
||||
self._result_cache = None
|
||||
self._iter = None
|
||||
|
||||
########################
|
||||
# PYTHON MAGIC METHODS #
|
||||
########################
|
||||
|
||||
def __repr__(self):
|
||||
return repr(self._get_data())
|
||||
return repr(list(iter(self)))
|
||||
|
||||
def __len__(self):
|
||||
return len(self._get_data())
|
||||
return len(list(iter(self)))
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._get_data())
|
||||
pos = 0
|
||||
if self._result_cache is None:
|
||||
self._iter = self.iterator()
|
||||
self._result_cache = []
|
||||
while 1:
|
||||
upper = len(self._result_cache)
|
||||
while pos < upper:
|
||||
yield self._result_cache[pos]
|
||||
pos = pos + 1
|
||||
if not self._iter:
|
||||
raise StopIteration
|
||||
if len(self._result_cache) <= pos:
|
||||
self._fill_cache()
|
||||
|
||||
def __nonzero__(self):
|
||||
if self._result_cache is None:
|
||||
try:
|
||||
iter(self).next()
|
||||
except StopIteration:
|
||||
return False
|
||||
return True
|
||||
|
||||
def __getitem__(self, k):
|
||||
"Retrieve an item or slice from the set of results."
|
||||
@ -52,6 +74,15 @@ class _QuerySet(object):
|
||||
"Negative indexing is not supported."
|
||||
|
||||
if self._result_cache is not None:
|
||||
if self._iter is not None:
|
||||
# The result cache has only been partially populated, so we may
|
||||
# need to fill it out a bit more.
|
||||
if isinstance(k, slice):
|
||||
bound = k.stop
|
||||
else:
|
||||
bound = k + 1
|
||||
if len(self._result_cache) < bound:
|
||||
self._fill_cache(bound - len(self._result_cache))
|
||||
return self._result_cache[k]
|
||||
|
||||
if isinstance(k, slice):
|
||||
@ -375,10 +406,17 @@ class _QuerySet(object):
|
||||
c._setup_query()
|
||||
return c
|
||||
|
||||
def _get_data(self):
|
||||
if self._result_cache is None:
|
||||
self._result_cache = list(self.iterator())
|
||||
return self._result_cache
|
||||
def _fill_cache(self, num=None):
|
||||
"""
|
||||
Fills the result cache with 'num' more entries (or until the results
|
||||
iterator is exhausted).
|
||||
"""
|
||||
if self._iter:
|
||||
try:
|
||||
for i in range(num or ITER_CHUNK_SIZE):
|
||||
self._result_cache.append(self._iter.next())
|
||||
except StopIteration:
|
||||
self._iter = None
|
||||
|
||||
# Use the backend's QuerySet class if it defines one. Otherwise, use _QuerySet.
|
||||
if connection.features.uses_custom_queryset:
|
||||
@ -395,6 +433,9 @@ class ValuesQuerySet(QuerySet):
|
||||
# QuerySet.clone() will also set up the _fields attribute with the
|
||||
# names of the model fields to select.
|
||||
|
||||
def __iter__(self):
|
||||
return self.iterator()
|
||||
|
||||
def iterator(self):
|
||||
self.field_names.extend([f for f in self.query.extra_select.keys()])
|
||||
for row in self.query.results_iter():
|
||||
|
@ -501,5 +501,42 @@ Bug #6203
|
||||
2
|
||||
>>> len(Item.objects.dates('created', 'day'))
|
||||
2
|
||||
|
||||
Test that parallel iterators work.
|
||||
|
||||
>>> qs = Tag.objects.all()
|
||||
>>> i1, i2 = iter(qs), iter(qs)
|
||||
>>> i1.next(), i1.next()
|
||||
(<Tag: t1>, <Tag: t2>)
|
||||
>>> i2.next(), i2.next(), i2.next()
|
||||
(<Tag: t1>, <Tag: t2>, <Tag: t3>)
|
||||
>>> i1.next()
|
||||
<Tag: t3>
|
||||
|
||||
We can do slicing beyond what is currently in the result cache, too.
|
||||
|
||||
# We need to mess with the implemenation internals a bit here to decrease the
|
||||
# cache fill size so that we don't read all the results at once.
|
||||
>>> from django.db.models import query
|
||||
>>> query.ITER_CHUNK_SIZE = 2
|
||||
>>> qs = Tag.objects.all()
|
||||
|
||||
# Fill the cache with the first chunk.
|
||||
>>> bool(qs)
|
||||
True
|
||||
>>> len(qs._result_cache)
|
||||
2
|
||||
|
||||
# Query beyond the end of the cache and check that it is filled out as required.
|
||||
>>> qs[4]
|
||||
<Tag: t5>
|
||||
>>> len(qs._result_cache)
|
||||
5
|
||||
|
||||
# But querying beyond the end of the result set will fail.
|
||||
>>> qs[100]
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
IndexError: ...
|
||||
"""}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user