Skip to content
Snippets Groups Projects
Verified Commit 541515c5 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

Add LFUCache behavior to cache

parent 54d0afd0
Branches with-lfu-cache
No related tags found
No related merge requests found
......@@ -9,6 +9,7 @@ Build-Depends: debhelper (>= 9),
python3-setuptools,
python3-swh.core,
python3-swh.storage,
python3-cachetools,
python3-retrying,
python3-vcversioner
Standards-Version: 3.9.6
......
......@@ -5,3 +5,4 @@ vcversioner
swh-core
swh-storage
swh-model
cachetools
......@@ -3,58 +3,43 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from collections import deque
from cachetools.lfu import LFUCache
class SimpleCache():
def __init__(self, max_size=10000, eviction_percent=0.2):
"""Initialize cache of max_size elements.
class SimpleCache(LFUCache):
def __init__(self, maxsize=10000, eviction_percent=0.2):
"""Initialize a cache of maxsize elements.
Args:
When the maxsize is hit, an eviction routine is triggered
to remove the least frequently used hit data.
- max_size: the max number of elements to cache.
Args:
- maxsize: the max number of elements to cache.
- eviction_percent: Percent of elements to evict from cache
when max_size is reached. The eviction removes the first
when maxsize is reached. The eviction removes the lfu
elements from the cache.
"""
self.max_size = max_size
super().__init__(maxsize=maxsize)
assert eviction_percent >= 0 and eviction_percent <= 1
self.nb_elements_to_purge = int(max_size * eviction_percent)
self.s = set()
self.stack = deque([], maxlen=max_size)
self.count = 0
def __str__(self):
return ('set: %s, stack: %s, count: %s, max-size: %s, nb-purge: %s' % (
self.s,
self.stack,
self.count,
self.max_size,
self.nb_elements_to_purge))
self.nb_elements_to_purge = int(maxsize * eviction_percent)
def _evict(self):
"""Remove self.nb_elements_to_purge from cache.
"""
elems_to_remove = set()
for x in range(0, self.nb_elements_to_purge):
e = self.stack.popleft()
elems_to_remove.add(e)
self.s = self.s - elems_to_remove
self.count = self.count - self.nb_elements_to_purge
for _ in range(0, self.nb_elements_to_purge):
self.popitem()
def add(self, e):
if e not in self.s:
self.s.add(e)
self.stack.append(e)
self.count += 1
if self.count >= self.max_size:
self._evict()
def set(self):
return self.s
if self.currsize+1 >= self.maxsize:
self._evict()
super().__setitem__(key=e, value=e)
def __contains__(self, e):
return e in self.s
try:
self.__getitem__(e)
except:
return False
else:
return True
......@@ -15,53 +15,65 @@ class TestSimpleCache(unittest.TestCase):
@istest
def simple_cache_behavior_fails_to_init(self):
try:
SimpleCache(max_size=6, eviction_percent=10)
SimpleCache(maxsize=6, eviction_percent=10)
except AssertionError:
self.assertTrue(True)
@istest
def simple_cache_behavior(self):
# given
cache = SimpleCache(max_size=6, eviction_percent=0.5)
cache = SimpleCache(maxsize=6, eviction_percent=0.5)
cache.add(3)
cache.add(2)
cache.add(1)
cache.add(1) # duplicate elements are dismissed
# when
self.assertEquals(cache.set(), {1, 2, 3})
self.assertTrue(1 in cache)
self.assertTrue(2 in cache)
self.assertTrue(3 in cache)
self.assertTrue(4 not in cache)
self.assertEquals(cache.count, 3)
self.assertFalse(4 in cache)
cache.add(4)
cache.add(5)
self.assertEquals(cache.set(), {1, 2, 3, 4, 5})
self.assertTrue(1 in cache)
self.assertTrue(2 in cache)
self.assertTrue(3 in cache)
self.assertTrue(4 in cache)
self.assertTrue(5 in cache)
self.assertEquals(cache.count, 5)
cache.add(6) # we hit max-size, 50% of elements (here 3) are evicted
self.assertFalse(6 in cache)
self.assertEquals(cache.__getitem__(4), 4) # increment their use
self.assertEquals(cache.__getitem__(5), 5) # increment their use
cache.add(4)
cache.add(4) # increment their use
cache.add(5)
cache.add(5) # increment their use
cache.add(6) # we hit maxsize
self.assertEquals(cache.set(), {4, 5, 6})
self.assertTrue(4 in cache)
self.assertTrue(5 in cache)
self.assertTrue(6 in cache)
self.assertTrue(1 not in cache)
self.assertTrue(2 not in cache)
self.assertTrue(3 not in cache)
self.assertEquals(cache.count, 3)
# stat on counts (each in action and get action increments use with 1):
# 1: 3
# 2: 3
# 3: 3
# 4: 5
# 5: 5
# 6: 1 # 6 is inserted after eviction. Else it could never be inserted
# we hit the max size of 6 so 50% of data (3) will be removed.
# As 1, 2, 3 are the least frequently used so they are the ones evicted
self.assertFalse(1 in cache)
self.assertFalse(2 in cache)
self.assertFalse(3 in cache)
cache.add(7)
cache.add(8)
self.assertEquals(cache.set(), {4, 5, 6, 7, 8})
self.assertTrue(7 in cache)
self.assertTrue(8 in cache)
self.assertEquals(cache.count, 5)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment