From 541515c5b44f429c9c1303de9eea0dca8288b50f Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com> Date: Sun, 10 Apr 2016 11:44:31 +0200 Subject: [PATCH] Add LFUCache behavior to cache --- debian/control | 1 + requirements.txt | 1 + swh/loader/vcs/cache.py | 59 +++++++++++------------------- swh/loader/vcs/tests/test_cache.py | 44 ++++++++++++++-------- 4 files changed, 52 insertions(+), 53 deletions(-) diff --git a/debian/control b/debian/control index 29e874a0..ad7be19f 100644 --- a/debian/control +++ b/debian/control @@ -9,6 +9,7 @@ Build-Depends: debhelper (>= 9), python3-setuptools, python3-swh.core, python3-swh.storage, + python3-cachetools, python3-retrying, python3-vcversioner Standards-Version: 3.9.6 diff --git a/requirements.txt b/requirements.txt index 84ec4b42..420fe5e1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ vcversioner swh-core swh-storage swh-model +cachetools diff --git a/swh/loader/vcs/cache.py b/swh/loader/vcs/cache.py index 4607c0e3..77b7b59e 100644 --- a/swh/loader/vcs/cache.py +++ b/swh/loader/vcs/cache.py @@ -3,58 +3,43 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from collections import deque +from cachetools.lfu import LFUCache -class SimpleCache(): - def __init__(self, max_size=10000, eviction_percent=0.2): - """Initialize cache of max_size elements. +class SimpleCache(LFUCache): + def __init__(self, maxsize=10000, eviction_percent=0.2): + """Initialize a cache of maxsize elements. - Args: + When the maxsize is hit, an eviction routine is triggered + to remove the least frequently used hit data. - - max_size: the max number of elements to cache. + Args: + - maxsize: the max number of elements to cache. - eviction_percent: Percent of elements to evict from cache - when max_size is reached. The eviction removes the first + when maxsize is reached. The eviction removes the lfu elements from the cache. """ - self.max_size = max_size + super().__init__(maxsize=maxsize) assert eviction_percent >= 0 and eviction_percent <= 1 - self.nb_elements_to_purge = int(max_size * eviction_percent) - self.s = set() - self.stack = deque([], maxlen=max_size) - self.count = 0 - - def __str__(self): - return ('set: %s, stack: %s, count: %s, max-size: %s, nb-purge: %s' % ( - self.s, - self.stack, - self.count, - self.max_size, - self.nb_elements_to_purge)) + self.nb_elements_to_purge = int(maxsize * eviction_percent) def _evict(self): """Remove self.nb_elements_to_purge from cache. """ - elems_to_remove = set() - for x in range(0, self.nb_elements_to_purge): - e = self.stack.popleft() - elems_to_remove.add(e) - self.s = self.s - elems_to_remove - self.count = self.count - self.nb_elements_to_purge + for _ in range(0, self.nb_elements_to_purge): + self.popitem() def add(self, e): - if e not in self.s: - self.s.add(e) - self.stack.append(e) - self.count += 1 - - if self.count >= self.max_size: - self._evict() - - def set(self): - return self.s + if self.currsize+1 >= self.maxsize: + self._evict() + super().__setitem__(key=e, value=e) def __contains__(self, e): - return e in self.s + try: + self.__getitem__(e) + except: + return False + else: + return True diff --git a/swh/loader/vcs/tests/test_cache.py b/swh/loader/vcs/tests/test_cache.py index b286cdab..a05c73a2 100644 --- a/swh/loader/vcs/tests/test_cache.py +++ b/swh/loader/vcs/tests/test_cache.py @@ -15,53 +15,65 @@ class TestSimpleCache(unittest.TestCase): @istest def simple_cache_behavior_fails_to_init(self): try: - SimpleCache(max_size=6, eviction_percent=10) + SimpleCache(maxsize=6, eviction_percent=10) except AssertionError: self.assertTrue(True) @istest def simple_cache_behavior(self): # given - cache = SimpleCache(max_size=6, eviction_percent=0.5) + cache = SimpleCache(maxsize=6, eviction_percent=0.5) cache.add(3) cache.add(2) cache.add(1) - cache.add(1) # duplicate elements are dismissed # when - self.assertEquals(cache.set(), {1, 2, 3}) self.assertTrue(1 in cache) self.assertTrue(2 in cache) self.assertTrue(3 in cache) - self.assertTrue(4 not in cache) - self.assertEquals(cache.count, 3) + + self.assertFalse(4 in cache) cache.add(4) cache.add(5) - self.assertEquals(cache.set(), {1, 2, 3, 4, 5}) self.assertTrue(1 in cache) self.assertTrue(2 in cache) self.assertTrue(3 in cache) self.assertTrue(4 in cache) self.assertTrue(5 in cache) - self.assertEquals(cache.count, 5) - cache.add(6) # we hit max-size, 50% of elements (here 3) are evicted + self.assertFalse(6 in cache) + + self.assertEquals(cache.__getitem__(4), 4) # increment their use + self.assertEquals(cache.__getitem__(5), 5) # increment their use + + cache.add(4) + cache.add(4) # increment their use + cache.add(5) + cache.add(5) # increment their use + cache.add(6) # we hit maxsize - self.assertEquals(cache.set(), {4, 5, 6}) self.assertTrue(4 in cache) self.assertTrue(5 in cache) self.assertTrue(6 in cache) - self.assertTrue(1 not in cache) - self.assertTrue(2 not in cache) - self.assertTrue(3 not in cache) - self.assertEquals(cache.count, 3) + + # stat on counts (each in action and get action increments use with 1): + # 1: 3 + # 2: 3 + # 3: 3 + # 4: 5 + # 5: 5 + # 6: 1 # 6 is inserted after eviction. Else it could never be inserted + + # we hit the max size of 6 so 50% of data (3) will be removed. + # As 1, 2, 3 are the least frequently used so they are the ones evicted + self.assertFalse(1 in cache) + self.assertFalse(2 in cache) + self.assertFalse(3 in cache) cache.add(7) cache.add(8) - self.assertEquals(cache.set(), {4, 5, 6, 7, 8}) self.assertTrue(7 in cache) self.assertTrue(8 in cache) - self.assertEquals(cache.count, 5) -- GitLab