diff --git a/debian/control b/debian/control index 29e874a007f12ea1ac554881d0e98adea43d88e1..ad7be19f384e46ee77fdefabc14d5f3a594f1a3e 100644 --- a/debian/control +++ b/debian/control @@ -9,6 +9,7 @@ Build-Depends: debhelper (>= 9), python3-setuptools, python3-swh.core, python3-swh.storage, + python3-cachetools, python3-retrying, python3-vcversioner Standards-Version: 3.9.6 diff --git a/requirements.txt b/requirements.txt index 84ec4b42c96124486b25703cbcf4d7d6296fd3dc..420fe5e1a3171e8629c1146a5dd7c142798c7c7a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ vcversioner swh-core swh-storage swh-model +cachetools diff --git a/swh/loader/vcs/cache.py b/swh/loader/vcs/cache.py index 4607c0e3c3681a9f77363c3ed47add8913a083e7..77b7b59e66ee4c69dc62217d638774cd7bfcdef7 100644 --- a/swh/loader/vcs/cache.py +++ b/swh/loader/vcs/cache.py @@ -3,58 +3,43 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from collections import deque +from cachetools.lfu import LFUCache -class SimpleCache(): - def __init__(self, max_size=10000, eviction_percent=0.2): - """Initialize cache of max_size elements. +class SimpleCache(LFUCache): + def __init__(self, maxsize=10000, eviction_percent=0.2): + """Initialize a cache of maxsize elements. - Args: + When the maxsize is hit, an eviction routine is triggered + to remove the least frequently used hit data. - - max_size: the max number of elements to cache. + Args: + - maxsize: the max number of elements to cache. - eviction_percent: Percent of elements to evict from cache - when max_size is reached. The eviction removes the first + when maxsize is reached. The eviction removes the lfu elements from the cache. """ - self.max_size = max_size + super().__init__(maxsize=maxsize) assert eviction_percent >= 0 and eviction_percent <= 1 - self.nb_elements_to_purge = int(max_size * eviction_percent) - self.s = set() - self.stack = deque([], maxlen=max_size) - self.count = 0 - - def __str__(self): - return ('set: %s, stack: %s, count: %s, max-size: %s, nb-purge: %s' % ( - self.s, - self.stack, - self.count, - self.max_size, - self.nb_elements_to_purge)) + self.nb_elements_to_purge = int(maxsize * eviction_percent) def _evict(self): """Remove self.nb_elements_to_purge from cache. """ - elems_to_remove = set() - for x in range(0, self.nb_elements_to_purge): - e = self.stack.popleft() - elems_to_remove.add(e) - self.s = self.s - elems_to_remove - self.count = self.count - self.nb_elements_to_purge + for _ in range(0, self.nb_elements_to_purge): + self.popitem() def add(self, e): - if e not in self.s: - self.s.add(e) - self.stack.append(e) - self.count += 1 - - if self.count >= self.max_size: - self._evict() - - def set(self): - return self.s + if self.currsize+1 >= self.maxsize: + self._evict() + super().__setitem__(key=e, value=e) def __contains__(self, e): - return e in self.s + try: + self.__getitem__(e) + except: + return False + else: + return True diff --git a/swh/loader/vcs/tests/test_cache.py b/swh/loader/vcs/tests/test_cache.py index b286cdab5fda80595f86a5a8f31971c7ce3832a9..a05c73a2a535d1eae952f4d8b915fdf4c71f643f 100644 --- a/swh/loader/vcs/tests/test_cache.py +++ b/swh/loader/vcs/tests/test_cache.py @@ -15,53 +15,65 @@ class TestSimpleCache(unittest.TestCase): @istest def simple_cache_behavior_fails_to_init(self): try: - SimpleCache(max_size=6, eviction_percent=10) + SimpleCache(maxsize=6, eviction_percent=10) except AssertionError: self.assertTrue(True) @istest def simple_cache_behavior(self): # given - cache = SimpleCache(max_size=6, eviction_percent=0.5) + cache = SimpleCache(maxsize=6, eviction_percent=0.5) cache.add(3) cache.add(2) cache.add(1) - cache.add(1) # duplicate elements are dismissed # when - self.assertEquals(cache.set(), {1, 2, 3}) self.assertTrue(1 in cache) self.assertTrue(2 in cache) self.assertTrue(3 in cache) - self.assertTrue(4 not in cache) - self.assertEquals(cache.count, 3) + + self.assertFalse(4 in cache) cache.add(4) cache.add(5) - self.assertEquals(cache.set(), {1, 2, 3, 4, 5}) self.assertTrue(1 in cache) self.assertTrue(2 in cache) self.assertTrue(3 in cache) self.assertTrue(4 in cache) self.assertTrue(5 in cache) - self.assertEquals(cache.count, 5) - cache.add(6) # we hit max-size, 50% of elements (here 3) are evicted + self.assertFalse(6 in cache) + + self.assertEquals(cache.__getitem__(4), 4) # increment their use + self.assertEquals(cache.__getitem__(5), 5) # increment their use + + cache.add(4) + cache.add(4) # increment their use + cache.add(5) + cache.add(5) # increment their use + cache.add(6) # we hit maxsize - self.assertEquals(cache.set(), {4, 5, 6}) self.assertTrue(4 in cache) self.assertTrue(5 in cache) self.assertTrue(6 in cache) - self.assertTrue(1 not in cache) - self.assertTrue(2 not in cache) - self.assertTrue(3 not in cache) - self.assertEquals(cache.count, 3) + + # stat on counts (each in action and get action increments use with 1): + # 1: 3 + # 2: 3 + # 3: 3 + # 4: 5 + # 5: 5 + # 6: 1 # 6 is inserted after eviction. Else it could never be inserted + + # we hit the max size of 6 so 50% of data (3) will be removed. + # As 1, 2, 3 are the least frequently used so they are the ones evicted + self.assertFalse(1 in cache) + self.assertFalse(2 in cache) + self.assertFalse(3 in cache) cache.add(7) cache.add(8) - self.assertEquals(cache.set(), {4, 5, 6, 7, 8}) self.assertTrue(7 in cache) self.assertTrue(8 in cache) - self.assertEquals(cache.count, 5)