From 541515c5b44f429c9c1303de9eea0dca8288b50f Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com>
Date: Sun, 10 Apr 2016 11:44:31 +0200
Subject: [PATCH] Add LFUCache behavior to cache

---
 debian/control                     |  1 +
 requirements.txt                   |  1 +
 swh/loader/vcs/cache.py            | 59 +++++++++++-------------------
 swh/loader/vcs/tests/test_cache.py | 44 ++++++++++++++--------
 4 files changed, 52 insertions(+), 53 deletions(-)

diff --git a/debian/control b/debian/control
index 29e874a0..ad7be19f 100644
--- a/debian/control
+++ b/debian/control
@@ -9,6 +9,7 @@ Build-Depends: debhelper (>= 9),
                python3-setuptools,
                python3-swh.core,
                python3-swh.storage,
+               python3-cachetools,
                python3-retrying,
                python3-vcversioner
 Standards-Version: 3.9.6
diff --git a/requirements.txt b/requirements.txt
index 84ec4b42..420fe5e1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ vcversioner
 swh-core
 swh-storage
 swh-model
+cachetools
diff --git a/swh/loader/vcs/cache.py b/swh/loader/vcs/cache.py
index 4607c0e3..77b7b59e 100644
--- a/swh/loader/vcs/cache.py
+++ b/swh/loader/vcs/cache.py
@@ -3,58 +3,43 @@
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-from collections import deque
+from cachetools.lfu import LFUCache
 
 
-class SimpleCache():
-    def __init__(self, max_size=10000, eviction_percent=0.2):
-        """Initialize cache of max_size elements.
+class SimpleCache(LFUCache):
+    def __init__(self, maxsize=10000, eviction_percent=0.2):
+        """Initialize a cache of maxsize elements.
 
-           Args:
+           When the maxsize is hit, an eviction routine is triggered
+           to remove the least frequently used hit data.
 
-           - max_size: the max number of elements to cache.
+           Args:
+           - maxsize: the max number of elements to cache.
            - eviction_percent: Percent of elements to evict from cache
-           when max_size is reached. The eviction removes the first
+           when maxsize is reached. The eviction removes the lfu
            elements from the cache.
 
         """
-        self.max_size = max_size
+        super().__init__(maxsize=maxsize)
         assert eviction_percent >= 0 and eviction_percent <= 1
-        self.nb_elements_to_purge = int(max_size * eviction_percent)
-        self.s = set()
-        self.stack = deque([], maxlen=max_size)
-        self.count = 0
-
-    def __str__(self):
-        return ('set: %s, stack: %s, count: %s, max-size: %s, nb-purge: %s' % (
-            self.s,
-            self.stack,
-            self.count,
-            self.max_size,
-            self.nb_elements_to_purge))
+        self.nb_elements_to_purge = int(maxsize * eviction_percent)
 
     def _evict(self):
         """Remove self.nb_elements_to_purge from cache.
 
         """
-        elems_to_remove = set()
-        for x in range(0, self.nb_elements_to_purge):
-            e = self.stack.popleft()
-            elems_to_remove.add(e)
-        self.s = self.s - elems_to_remove
-        self.count = self.count - self.nb_elements_to_purge
+        for _ in range(0, self.nb_elements_to_purge):
+            self.popitem()
 
     def add(self, e):
-        if e not in self.s:
-            self.s.add(e)
-            self.stack.append(e)
-            self.count += 1
-
-            if self.count >= self.max_size:
-                self._evict()
-
-    def set(self):
-        return self.s
+        if self.currsize+1 >= self.maxsize:
+            self._evict()
+        super().__setitem__(key=e, value=e)
 
     def __contains__(self, e):
-        return e in self.s
+        try:
+            self.__getitem__(e)
+        except:
+            return False
+        else:
+            return True
diff --git a/swh/loader/vcs/tests/test_cache.py b/swh/loader/vcs/tests/test_cache.py
index b286cdab..a05c73a2 100644
--- a/swh/loader/vcs/tests/test_cache.py
+++ b/swh/loader/vcs/tests/test_cache.py
@@ -15,53 +15,65 @@ class TestSimpleCache(unittest.TestCase):
     @istest
     def simple_cache_behavior_fails_to_init(self):
         try:
-            SimpleCache(max_size=6, eviction_percent=10)
+            SimpleCache(maxsize=6, eviction_percent=10)
         except AssertionError:
             self.assertTrue(True)
 
     @istest
     def simple_cache_behavior(self):
         # given
-        cache = SimpleCache(max_size=6, eviction_percent=0.5)
+        cache = SimpleCache(maxsize=6, eviction_percent=0.5)
 
         cache.add(3)
         cache.add(2)
         cache.add(1)
-        cache.add(1)  # duplicate elements are dismissed
 
         # when
-        self.assertEquals(cache.set(), {1, 2, 3})
         self.assertTrue(1 in cache)
         self.assertTrue(2 in cache)
         self.assertTrue(3 in cache)
-        self.assertTrue(4 not in cache)
-        self.assertEquals(cache.count, 3)
+
+        self.assertFalse(4 in cache)
 
         cache.add(4)
         cache.add(5)
 
-        self.assertEquals(cache.set(), {1, 2, 3, 4, 5})
         self.assertTrue(1 in cache)
         self.assertTrue(2 in cache)
         self.assertTrue(3 in cache)
         self.assertTrue(4 in cache)
         self.assertTrue(5 in cache)
-        self.assertEquals(cache.count, 5)
 
-        cache.add(6)  # we hit max-size, 50% of elements (here 3) are evicted
+        self.assertFalse(6 in cache)
+
+        self.assertEquals(cache.__getitem__(4), 4)  # increment their use
+        self.assertEquals(cache.__getitem__(5), 5)  # increment their use
+
+        cache.add(4)
+        cache.add(4)   # increment their use
+        cache.add(5)
+        cache.add(5)   # increment their use
+        cache.add(6)   # we hit maxsize
 
-        self.assertEquals(cache.set(), {4, 5, 6})
         self.assertTrue(4 in cache)
         self.assertTrue(5 in cache)
         self.assertTrue(6 in cache)
-        self.assertTrue(1 not in cache)
-        self.assertTrue(2 not in cache)
-        self.assertTrue(3 not in cache)
-        self.assertEquals(cache.count, 3)
+
+        # stat on counts (each in action and get action increments use with 1):
+        # 1: 3
+        # 2: 3
+        # 3: 3
+        # 4: 5
+        # 5: 5
+        # 6: 1  # 6 is inserted after eviction. Else it could never be inserted
+
+        # we hit the max size of 6 so 50% of data (3) will be removed.
+        # As 1, 2, 3 are the least frequently used so they are the ones evicted
+        self.assertFalse(1 in cache)
+        self.assertFalse(2 in cache)
+        self.assertFalse(3 in cache)
 
         cache.add(7)
         cache.add(8)
-        self.assertEquals(cache.set(), {4, 5, 6, 7, 8})
         self.assertTrue(7 in cache)
         self.assertTrue(8 in cache)
-        self.assertEquals(cache.count, 5)
-- 
GitLab