From d38a3169a426516623929ff8c2b2c9703d801b75 Mon Sep 17 00:00:00 2001 From: Grant Jenks Date: Wed, 24 Jan 2018 09:26:19 -0800 Subject: [PATCH] Fixed #28977 -- Changed local-memory cache to use LRU culling. LRU culling turns every read into a kind of write to the cache: cache keys are moved to the first position in the OrderedDict when they are retrieved. The RWLock which permitted multiple readers while prioritizing a single writer is obsolete since all accesses are now writes. --- AUTHORS | 1 + django/core/cache/backends/locmem.py | 91 +++++++++++----------------- django/utils/synch.py | 90 --------------------------- docs/releases/2.1.txt | 3 +- docs/topics/cache.txt | 6 ++ tests/cache/tests.py | 50 ++++++++++++++- 6 files changed, 94 insertions(+), 147 deletions(-) delete mode 100644 django/utils/synch.py diff --git a/AUTHORS b/AUTHORS index e106e231e4..2034ee4fcb 100644 --- a/AUTHORS +++ b/AUTHORS @@ -301,6 +301,7 @@ answer newbie questions, and generally made Django that much better: Gonzalo Saavedra Gopal Narayanan Graham Carlyle + Grant Jenks Greg Chapple Gregor Müllegger Grigory Fateyev diff --git a/django/core/cache/backends/locmem.py b/django/core/cache/backends/locmem.py index 69561fb735..cd610bbb73 100644 --- a/django/core/cache/backends/locmem.py +++ b/django/core/cache/backends/locmem.py @@ -1,10 +1,10 @@ "Thread-safe in-memory cache backend." import pickle import time -from contextlib import contextmanager +from collections import OrderedDict +from threading import Lock from django.core.cache.backends.base import DEFAULT_TIMEOUT, BaseCache -from django.utils.synch import RWLock # Global in-memory store of cache data. Keyed by name, to provide # multiple named local memory caches. @@ -13,88 +13,71 @@ _expire_info = {} _locks = {} -@contextmanager -def dummy(): - """A context manager that does nothing special.""" - yield - - class LocMemCache(BaseCache): def __init__(self, name, params): super().__init__(params) - self._cache = _caches.setdefault(name, {}) + self._cache = _caches.setdefault(name, OrderedDict()) self._expire_info = _expire_info.setdefault(name, {}) - self._lock = _locks.setdefault(name, RWLock()) + self._lock = _locks.setdefault(name, Lock()) def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None): key = self.make_key(key, version=version) self.validate_key(key) pickled = pickle.dumps(value, pickle.HIGHEST_PROTOCOL) - with self._lock.writer(): + with self._lock: if self._has_expired(key): self._set(key, pickled, timeout) return True return False - def get(self, key, default=None, version=None, acquire_lock=True): + def get(self, key, default=None, version=None): key = self.make_key(key, version=version) self.validate_key(key) - pickled = None - with (self._lock.reader() if acquire_lock else dummy()): - if not self._has_expired(key): - pickled = self._cache[key] - if pickled is not None: - try: - return pickle.loads(pickled) - except pickle.PickleError: + with self._lock: + if self._has_expired(key): + self._delete(key) return default - - with (self._lock.writer() if acquire_lock else dummy()): - try: - del self._cache[key] - del self._expire_info[key] - except KeyError: - pass - return default + pickled = self._cache[key] + self._cache.move_to_end(key, last=False) + return pickle.loads(pickled) def _set(self, key, value, timeout=DEFAULT_TIMEOUT): if len(self._cache) >= self._max_entries: self._cull() self._cache[key] = value + self._cache.move_to_end(key, last=False) self._expire_info[key] = self.get_backend_timeout(timeout) def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None): key = self.make_key(key, version=version) self.validate_key(key) pickled = pickle.dumps(value, pickle.HIGHEST_PROTOCOL) - with self._lock.writer(): + with self._lock: self._set(key, pickled, timeout) def incr(self, key, delta=1, version=None): - with self._lock.writer(): - value = self.get(key, version=version, acquire_lock=False) - if value is None: + key = self.make_key(key, version=version) + self.validate_key(key) + with self._lock: + if self._has_expired(key): + self._delete(key) raise ValueError("Key '%s' not found" % key) + pickled = self._cache[key] + value = pickle.loads(pickled) new_value = value + delta - key = self.make_key(key, version=version) pickled = pickle.dumps(new_value, pickle.HIGHEST_PROTOCOL) self._cache[key] = pickled + self._cache.move_to_end(key, last=False) return new_value def has_key(self, key, version=None): key = self.make_key(key, version=version) self.validate_key(key) - with self._lock.reader(): - if not self._has_expired(key): - return True - - with self._lock.writer(): - try: - del self._cache[key] - del self._expire_info[key] - except KeyError: - pass - return False + with self._lock: + if self._has_expired(key): + self._delete(key) + return False + return True def _has_expired(self, key): exp = self._expire_info.get(key, -1) @@ -102,18 +85,17 @@ class LocMemCache(BaseCache): def _cull(self): if self._cull_frequency == 0: - self.clear() + self._cache.clear() + self._expire_info.clear() else: - doomed = [k for (i, k) in enumerate(self._cache) if i % self._cull_frequency == 0] - for k in doomed: - self._delete(k) + count = len(self._cache) // self._cull_frequency + for i in range(count): + key, _ = self._cache.popitem() + del self._expire_info[key] def _delete(self, key): try: del self._cache[key] - except KeyError: - pass - try: del self._expire_info[key] except KeyError: pass @@ -121,9 +103,10 @@ class LocMemCache(BaseCache): def delete(self, key, version=None): key = self.make_key(key, version=version) self.validate_key(key) - with self._lock.writer(): + with self._lock: self._delete(key) def clear(self): - self._cache.clear() - self._expire_info.clear() + with self._lock: + self._cache.clear() + self._expire_info.clear() diff --git a/django/utils/synch.py b/django/utils/synch.py deleted file mode 100644 index 23469869bd..0000000000 --- a/django/utils/synch.py +++ /dev/null @@ -1,90 +0,0 @@ -""" -Synchronization primitives: - - - reader-writer lock (preference to writers) - -(Contributed to Django by eugene@lazutkin.com) -""" - -import contextlib -import threading - - -class RWLock: - """ - Classic implementation of reader-writer lock with preference to writers. - - Readers can access a resource simultaneously. - Writers get an exclusive access. - - API is self-descriptive: - reader_enters() - reader_leaves() - writer_enters() - writer_leaves() - """ - def __init__(self): - self.mutex = threading.RLock() - self.can_read = threading.Semaphore(0) - self.can_write = threading.Semaphore(0) - self.active_readers = 0 - self.active_writers = 0 - self.waiting_readers = 0 - self.waiting_writers = 0 - - def reader_enters(self): - with self.mutex: - if self.active_writers == 0 and self.waiting_writers == 0: - self.active_readers += 1 - self.can_read.release() - else: - self.waiting_readers += 1 - self.can_read.acquire() - - def reader_leaves(self): - with self.mutex: - self.active_readers -= 1 - if self.active_readers == 0 and self.waiting_writers != 0: - self.active_writers += 1 - self.waiting_writers -= 1 - self.can_write.release() - - @contextlib.contextmanager - def reader(self): - self.reader_enters() - try: - yield - finally: - self.reader_leaves() - - def writer_enters(self): - with self.mutex: - if self.active_writers == 0 and self.waiting_writers == 0 and self.active_readers == 0: - self.active_writers = 1 - self.can_write.release() - else: - self.waiting_writers += 1 - self.can_write.acquire() - - def writer_leaves(self): - with self.mutex: - self.active_writers -= 1 - if self.waiting_writers != 0: - self.active_writers += 1 - self.waiting_writers -= 1 - self.can_write.release() - elif self.waiting_readers != 0: - t = self.waiting_readers - self.waiting_readers = 0 - self.active_readers += t - while t > 0: - self.can_read.release() - t -= 1 - - @contextlib.contextmanager - def writer(self): - self.writer_enters() - try: - yield - finally: - self.writer_leaves() diff --git a/docs/releases/2.1.txt b/docs/releases/2.1.txt index d7de90903c..24ab0faa50 100644 --- a/docs/releases/2.1.txt +++ b/docs/releases/2.1.txt @@ -108,7 +108,8 @@ Minor features Cache ~~~~~ -* ... +* The :ref:`local-memory cache backend ` now uses a + least-recently-used (LRU) culling strategy rather than a pseudo-random one. CSRF ~~~~ diff --git a/docs/topics/cache.txt b/docs/topics/cache.txt index d2cdf793f9..5d2376bebb 100644 --- a/docs/topics/cache.txt +++ b/docs/topics/cache.txt @@ -322,11 +322,17 @@ memory stores. If you only have one ``locmem`` cache, you can omit the memory cache, you will need to assign a name to at least one of them in order to keep them separate. +The cache uses a least-recently-used (LRU) culling strategy. + Note that each process will have its own private cache instance, which means no cross-process caching is possible. This obviously also means the local memory cache isn't particularly memory-efficient, so it's probably not a good choice for production environments. It's nice for development. +.. versionchanged:: 2.1 + + Older versions use a pseudo-random culling strategy rather than LRU. + Dummy caching (for development) ------------------------------- diff --git a/tests/cache/tests.py b/tests/cache/tests.py index 63e0c8a02e..79c49edd7e 100644 --- a/tests/cache/tests.py +++ b/tests/cache/tests.py @@ -1084,11 +1084,16 @@ class PicklingSideEffect: self.locked = False def __getstate__(self): - if self.cache._lock.active_writers: - self.locked = True + self.locked = self.cache._lock.locked() return {} +limit_locmem_entries = override_settings(CACHES=caches_setting_for_tests( + BACKEND='django.core.cache.backends.locmem.LocMemCache', + OPTIONS={'MAX_ENTRIES': 9}, +)) + + @override_settings(CACHES=caches_setting_for_tests( BACKEND='django.core.cache.backends.locmem.LocMemCache', )) @@ -1144,6 +1149,47 @@ class LocMemCacheTests(BaseCacheTests, TestCase): cache.decr(key) self.assertEqual(expire, cache._expire_info[_key]) + @limit_locmem_entries + def test_lru_get(self): + """get() moves cache keys.""" + for key in range(9): + cache.set(key, key, timeout=None) + for key in range(6): + self.assertEqual(cache.get(key), key) + cache.set(9, 9, timeout=None) + for key in range(6): + self.assertEqual(cache.get(key), key) + for key in range(6, 9): + self.assertIsNone(cache.get(key)) + self.assertEqual(cache.get(9), 9) + + @limit_locmem_entries + def test_lru_set(self): + """set() moves cache keys.""" + for key in range(9): + cache.set(key, key, timeout=None) + for key in range(3, 9): + cache.set(key, key, timeout=None) + cache.set(9, 9, timeout=None) + for key in range(3, 10): + self.assertEqual(cache.get(key), key) + for key in range(3): + self.assertIsNone(cache.get(key)) + + @limit_locmem_entries + def test_lru_incr(self): + """incr() moves cache keys.""" + for key in range(9): + cache.set(key, key, timeout=None) + for key in range(6): + cache.incr(key) + cache.set(9, 9, timeout=None) + for key in range(6): + self.assertEqual(cache.get(key), key + 1) + for key in range(6, 9): + self.assertIsNone(cache.get(key)) + self.assertEqual(cache.get(9), 9) + # memcached backend isn't guaranteed to be available. # To check the memcached backend, the test settings file will