From 84dcbe3d0e567157aa03e74ce724e8f3b4bc1f02 Mon Sep 17 00:00:00 2001
From: David Douard <david.douard@sdfa3.org>
Date: Fri, 2 Apr 2021 12:56:53 +0200
Subject: [PATCH] Merge test_replay's _check_replayed and check_replayed in a
 single function

---
 swh/storage/tests/test_backfill.py |  3 ++
 swh/storage/tests/test_replay.py   | 79 ++++++++++++------------------
 2 files changed, 33 insertions(+), 49 deletions(-)

diff --git a/swh/storage/tests/test_backfill.py b/swh/storage/tests/test_backfill.py
index d228ca8d5..6865aa6c0 100644
--- a/swh/storage/tests/test_backfill.py
+++ b/swh/storage/tests/test_backfill.py
@@ -19,6 +19,7 @@ from swh.storage.backfill import (
     compute_query,
     raw_extrinsic_metadata_target_ranges,
 )
+from swh.storage.in_memory import InMemoryStorage
 from swh.storage.replay import process_replay_objects
 from swh.storage.tests.test_replay import check_replayed
 
@@ -287,6 +288,8 @@ def test_backfiller(
     replayer2.process(worker_fn2)
 
     # Compare storages
+    assert isinstance(sto1, InMemoryStorage)  # needed to help mypy
+    assert isinstance(sto2, InMemoryStorage)
     check_replayed(sto1, sto2)
 
     for record in caplog.records:
diff --git a/swh/storage/tests/test_replay.py b/swh/storage/tests/test_replay.py
index a01589b28..cda12ed34 100644
--- a/swh/storage/tests/test_replay.py
+++ b/swh/storage/tests/test_replay.py
@@ -85,7 +85,9 @@ def test_storage_replayer(replayer_storage_and_client, caplog):
     nb_inserted = replayer.process(worker_fn)
     assert nb_sent == nb_inserted
 
-    _check_replayed(src, dst)
+    assert isinstance(src, InMemoryStorage)  # needed to help mypy
+    assert isinstance(dst, InMemoryStorage)
+    check_replayed(src, dst)
 
     collision = 0
     for record in caplog.records:
@@ -165,7 +167,9 @@ def test_storage_play_with_collision(replayer_storage_and_client, caplog):
         assert expected_content_hashes in actual_colliding_hashes
 
     # all objects from the src should exists in the dst storage
-    _check_replayed(src, dst, exclude=["contents"])
+    assert isinstance(src, InMemoryStorage)  # needed to help mypy
+    assert isinstance(dst, InMemoryStorage)  # needed to help mypy
+    check_replayed(src, dst, exclude=["contents"])
     # but the dst has one content more (one of the 2 colliding ones)
     assert (
         len(list(src._cql_runner._contents.iter_all()))
@@ -188,12 +192,29 @@ def test_replay_skipped_content_bwcompat(replayer_storage_and_client):
 # utility functions
 
 
-def _check_replayed(
-    src: InMemoryStorage, dst: InMemoryStorage, exclude: Optional[Container] = None
+def check_replayed(
+    src: InMemoryStorage,
+    dst: InMemoryStorage,
+    exclude: Optional[Container] = None,
+    expected_anonymized=False,
 ):
-    """Simple utility function to compare the content of 2 in_memory storages
+    """Simple utility function to compare the content of 2 in_memory storages"""
+
+    def fix_expected(attr, row):
+        if expected_anonymized:
+            if attr == "releases":
+                row = dataclasses.replace(
+                    row, author=row.author and row.author.anonymize()
+                )
+            elif attr == "revisions":
+                row = dataclasses.replace(
+                    row,
+                    author=row.author.anonymize(),
+                    committer=row.committer.anonymize(),
+                )
+
+        return row
 
-    """
     for attr_ in (
         "contents",
         "skipped_contents",
@@ -210,7 +231,7 @@ def _check_replayed(
         if exclude and attr_ in exclude:
             continue
         expected_objects = [
-            (id, nullify_ctime(obj))
+            (id, nullify_ctime(fix_expected(attr_, obj)))
             for id, obj in sorted(getattr(src._cql_runner, f"_{attr_}").iter_all())
         ]
         got_objects = [
@@ -321,46 +342,6 @@ def test_storage_play_anonymized(
     assert nb_sent == nb_inserted
     # Check the contents of the destination storage, and whether the anonymization was
     # properly used
+    assert isinstance(storage, InMemoryStorage)  # needed to help mypy
+    assert isinstance(dst_storage, InMemoryStorage)
     check_replayed(storage, dst_storage, expected_anonymized=not privileged)
-
-
-def check_replayed(src, dst, expected_anonymized=False):
-    """Simple utility function to compare the content of 2 in_memory storages
-
-    If expected_anonymized is True, objects from the source storage are anonymized
-    before comparing with the destination storage.
-
-    """
-
-    def maybe_anonymize(attr_, row):
-        if expected_anonymized:
-            if attr_ == "releases":
-                row = dataclasses.replace(row, author=row.author.anonymize())
-            elif attr_ == "revisions":
-                row = dataclasses.replace(
-                    row,
-                    author=row.author.anonymize(),
-                    committer=row.committer.anonymize(),
-                )
-        return row
-
-    for attr_ in (
-        "contents",
-        "skipped_contents",
-        "directories",
-        "revisions",
-        "releases",
-        "snapshots",
-        "origins",
-        "origin_visit_statuses",
-        "raw_extrinsic_metadata",
-    ):
-        expected_objects = [
-            (id, nullify_ctime(maybe_anonymize(attr_, obj)))
-            for id, obj in sorted(getattr(src._cql_runner, f"_{attr_}").iter_all())
-        ]
-        got_objects = [
-            (id, nullify_ctime(obj))
-            for id, obj in sorted(getattr(dst._cql_runner, f"_{attr_}").iter_all())
-        ]
-        assert got_objects == expected_objects, f"Mismatch object list for {attr_}"
-- 
GitLab