Skip to content
Snippets Groups Projects
Commit 84dcbe3d authored by David Douard's avatar David Douard
Browse files

Merge test_replay's _check_replayed and check_replayed in a single function

parent 36a7fd34
No related branches found
No related tags found
1 merge request!665Make pg Storage.extid_add() write extid objects to the journal
...@@ -19,6 +19,7 @@ from swh.storage.backfill import ( ...@@ -19,6 +19,7 @@ from swh.storage.backfill import (
compute_query, compute_query,
raw_extrinsic_metadata_target_ranges, raw_extrinsic_metadata_target_ranges,
) )
from swh.storage.in_memory import InMemoryStorage
from swh.storage.replay import process_replay_objects from swh.storage.replay import process_replay_objects
from swh.storage.tests.test_replay import check_replayed from swh.storage.tests.test_replay import check_replayed
...@@ -287,6 +288,8 @@ def test_backfiller( ...@@ -287,6 +288,8 @@ def test_backfiller(
replayer2.process(worker_fn2) replayer2.process(worker_fn2)
# Compare storages # Compare storages
assert isinstance(sto1, InMemoryStorage) # needed to help mypy
assert isinstance(sto2, InMemoryStorage)
check_replayed(sto1, sto2) check_replayed(sto1, sto2)
for record in caplog.records: for record in caplog.records:
......
...@@ -85,7 +85,9 @@ def test_storage_replayer(replayer_storage_and_client, caplog): ...@@ -85,7 +85,9 @@ def test_storage_replayer(replayer_storage_and_client, caplog):
nb_inserted = replayer.process(worker_fn) nb_inserted = replayer.process(worker_fn)
assert nb_sent == nb_inserted assert nb_sent == nb_inserted
_check_replayed(src, dst) assert isinstance(src, InMemoryStorage) # needed to help mypy
assert isinstance(dst, InMemoryStorage)
check_replayed(src, dst)
collision = 0 collision = 0
for record in caplog.records: for record in caplog.records:
...@@ -165,7 +167,9 @@ def test_storage_play_with_collision(replayer_storage_and_client, caplog): ...@@ -165,7 +167,9 @@ def test_storage_play_with_collision(replayer_storage_and_client, caplog):
assert expected_content_hashes in actual_colliding_hashes assert expected_content_hashes in actual_colliding_hashes
# all objects from the src should exists in the dst storage # all objects from the src should exists in the dst storage
_check_replayed(src, dst, exclude=["contents"]) assert isinstance(src, InMemoryStorage) # needed to help mypy
assert isinstance(dst, InMemoryStorage) # needed to help mypy
check_replayed(src, dst, exclude=["contents"])
# but the dst has one content more (one of the 2 colliding ones) # but the dst has one content more (one of the 2 colliding ones)
assert ( assert (
len(list(src._cql_runner._contents.iter_all())) len(list(src._cql_runner._contents.iter_all()))
...@@ -188,12 +192,29 @@ def test_replay_skipped_content_bwcompat(replayer_storage_and_client): ...@@ -188,12 +192,29 @@ def test_replay_skipped_content_bwcompat(replayer_storage_and_client):
# utility functions # utility functions
def _check_replayed( def check_replayed(
src: InMemoryStorage, dst: InMemoryStorage, exclude: Optional[Container] = None src: InMemoryStorage,
dst: InMemoryStorage,
exclude: Optional[Container] = None,
expected_anonymized=False,
): ):
"""Simple utility function to compare the content of 2 in_memory storages """Simple utility function to compare the content of 2 in_memory storages"""
def fix_expected(attr, row):
if expected_anonymized:
if attr == "releases":
row = dataclasses.replace(
row, author=row.author and row.author.anonymize()
)
elif attr == "revisions":
row = dataclasses.replace(
row,
author=row.author.anonymize(),
committer=row.committer.anonymize(),
)
return row
"""
for attr_ in ( for attr_ in (
"contents", "contents",
"skipped_contents", "skipped_contents",
...@@ -210,7 +231,7 @@ def _check_replayed( ...@@ -210,7 +231,7 @@ def _check_replayed(
if exclude and attr_ in exclude: if exclude and attr_ in exclude:
continue continue
expected_objects = [ expected_objects = [
(id, nullify_ctime(obj)) (id, nullify_ctime(fix_expected(attr_, obj)))
for id, obj in sorted(getattr(src._cql_runner, f"_{attr_}").iter_all()) for id, obj in sorted(getattr(src._cql_runner, f"_{attr_}").iter_all())
] ]
got_objects = [ got_objects = [
...@@ -321,46 +342,6 @@ def test_storage_play_anonymized( ...@@ -321,46 +342,6 @@ def test_storage_play_anonymized(
assert nb_sent == nb_inserted assert nb_sent == nb_inserted
# Check the contents of the destination storage, and whether the anonymization was # Check the contents of the destination storage, and whether the anonymization was
# properly used # properly used
assert isinstance(storage, InMemoryStorage) # needed to help mypy
assert isinstance(dst_storage, InMemoryStorage)
check_replayed(storage, dst_storage, expected_anonymized=not privileged) check_replayed(storage, dst_storage, expected_anonymized=not privileged)
def check_replayed(src, dst, expected_anonymized=False):
"""Simple utility function to compare the content of 2 in_memory storages
If expected_anonymized is True, objects from the source storage are anonymized
before comparing with the destination storage.
"""
def maybe_anonymize(attr_, row):
if expected_anonymized:
if attr_ == "releases":
row = dataclasses.replace(row, author=row.author.anonymize())
elif attr_ == "revisions":
row = dataclasses.replace(
row,
author=row.author.anonymize(),
committer=row.committer.anonymize(),
)
return row
for attr_ in (
"contents",
"skipped_contents",
"directories",
"revisions",
"releases",
"snapshots",
"origins",
"origin_visit_statuses",
"raw_extrinsic_metadata",
):
expected_objects = [
(id, nullify_ctime(maybe_anonymize(attr_, obj)))
for id, obj in sorted(getattr(src._cql_runner, f"_{attr_}").iter_all())
]
got_objects = [
(id, nullify_ctime(obj))
for id, obj in sorted(getattr(dst._cql_runner, f"_{attr_}").iter_all())
]
assert got_objects == expected_objects, f"Mismatch object list for {attr_}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment