From e5e594358b078b4326b2af462fab0649676d5924 Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com>
Date: Tue, 14 Apr 2020 10:54:14 +0200
Subject: [PATCH] storage.filter: Remove internal state

---
 swh/storage/filter.py            | 32 ++----------------
 swh/storage/tests/test_filter.py | 57 --------------------------------
 2 files changed, 2 insertions(+), 87 deletions(-)

diff --git a/swh/storage/filter.py b/swh/storage/filter.py
index 692e224b3..14d9882d9 100644
--- a/swh/storage/filter.py
+++ b/swh/storage/filter.py
@@ -4,7 +4,7 @@
 # See top-level LICENSE file for more information
 
 
-from typing import Dict, Iterable, Optional, Set
+from typing import Dict, Iterable, Set
 
 from swh.model.model import (
     Content,
@@ -33,12 +33,9 @@ class FilteringProxyStorage:
     """
 
     object_types = ["content", "skipped_content", "directory", "revision"]
-    objects_seen: Dict[str, Set[bytes]] = {}
 
     def __init__(self, storage):
         self.storage = get_storage(**storage)
-        for object_type in self.object_types:
-            self.objects_seen[object_type] = set()
 
     def __getattr__(self, key):
         if key == "storage":
@@ -77,12 +74,8 @@ class FilteringProxyStorage:
                 storage
 
         """
-        objects_seen = self.objects_seen["content"]
         missing_contents = []
         for content in contents:
-            if content.sha256 in objects_seen:
-                continue
-            objects_seen.add(content.sha256)
             missing_contents.append(content.hashes())
 
         return set(self.storage.content_missing(missing_contents, key_hash="sha256",))
@@ -97,12 +90,10 @@ class FilteringProxyStorage:
                 storage
 
         """
-        objects_seen = self.objects_seen["skipped_content"]
         missing_contents = []
         for content in contents:
-            if content.sha1_git is None or content.sha1_git in objects_seen:
+            if content.sha1_git is None:
                 continue
-            objects_seen.add(content.sha1_git)
             missing_contents.append(content.hashes())
 
         return {
@@ -121,12 +112,8 @@ class FilteringProxyStorage:
             Missing ids from the storage for object_type
 
         """
-        objects_seen = self.objects_seen[object_type]
         missing_ids = []
         for id in ids:
-            if id in objects_seen:
-                continue
-            objects_seen.add(id)
             missing_ids.append(id)
 
         fn_by_object_type = {
@@ -136,18 +123,3 @@ class FilteringProxyStorage:
 
         fn = fn_by_object_type[object_type]
         return set(fn(missing_ids))
-
-    def clear_buffers(self, object_types: Optional[Iterable[str]] = None) -> None:
-        """Clear objects from current buffer
-
-        """
-        if object_types is None:
-            object_types = self.object_types
-
-        for object_type in object_types:
-            self.objects_seen[object_type] = set()
-
-        return self.storage.clear_buffers(object_types)
-
-    def flush(self, object_types: Optional[Iterable[str]] = None) -> Dict:
-        return self.storage.flush(object_types)
diff --git a/swh/storage/tests/test_filter.py b/swh/storage/tests/test_filter.py
index 021873677..6043decbe 100644
--- a/swh/storage/tests/test_filter.py
+++ b/swh/storage/tests/test_filter.py
@@ -125,60 +125,3 @@ def test_filtering_proxy_storage_directory(swh_storage, sample_data):
     assert s == {
         "directory:add": 0,
     }
-
-
-def test_filtering_proxy_storage_clear(swh_storage, sample_data):
-    """Clear operation on filter proxy
-
-    """
-    threshold = 10
-    contents = sample_data["content"]
-    assert 0 < len(contents) < threshold
-    skipped_contents = sample_data["skipped_content"]
-    assert 0 < len(skipped_contents) < threshold
-    directories = sample_data["directory"]
-    assert 0 < len(directories) < threshold
-    revisions = sample_data["revision"]
-    assert 0 < len(revisions) < threshold
-    releases = sample_data["release"]
-    assert 0 < len(releases) < threshold
-
-    s = swh_storage.content_add(contents)
-    assert s["content:add"] == len(contents)
-    s = swh_storage.skipped_content_add(skipped_contents)
-    assert s == {
-        "skipped_content:add": len(directories),
-    }
-    s = swh_storage.directory_add(directories)
-    assert s == {
-        "directory:add": len(directories),
-    }
-    s = swh_storage.revision_add(revisions)
-    assert s == {
-        "revision:add": len(revisions),
-    }
-
-    assert len(swh_storage.objects_seen["content"]) == len(contents)
-    assert len(swh_storage.objects_seen["skipped_content"]) == len(skipped_contents)
-    assert len(swh_storage.objects_seen["directory"]) == len(directories)
-    assert len(swh_storage.objects_seen["revision"]) == len(revisions)
-
-    # clear only content from the buffer
-    s = swh_storage.clear_buffers(["content"])
-    assert s is None
-
-    # specific clear operation on specific object type content only touched
-    # them
-    assert len(swh_storage.objects_seen["content"]) == 0
-    assert len(swh_storage.objects_seen["skipped_content"]) == len(skipped_contents)
-    assert len(swh_storage.objects_seen["directory"]) == len(directories)
-    assert len(swh_storage.objects_seen["revision"]) == len(revisions)
-
-    # clear current buffer from all object types
-    s = swh_storage.clear_buffers()
-    assert s is None
-
-    assert len(swh_storage.objects_seen["content"]) == 0
-    assert len(swh_storage.objects_seen["skipped_content"]) == 0
-    assert len(swh_storage.objects_seen["directory"]) == 0
-    assert len(swh_storage.objects_seen["revision"]) == 0
-- 
GitLab