From 551c55ff04774b2c4e50e54f701c514064607f6a Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Thu, 4 Nov 2021 15:49:45 +0100
Subject: [PATCH] package loaders: Make test failures more helpful

Some tests did the following:

1. build a snapshot
2. get the snapshot from the storage
3. compare it with the expected snapshot
4. get the origin visit from the storage and check it

If the loader built a wrong snapshot, the test fails at step 2,
and the only information displayed is that the expected snapshot id
does not exist, which is very unhelpful.

Instead, I reordered them as: 1, 4, 2, 3. This way, if a wrong
snapshot is build by the loader, it is detected when comparing
the visit, and pytest shows the two hashes.
Then, the test can be modified to use the hash that is actually
generated to show the actual snapshot.

This is consistent with what was already done in the pypi loader.

Additionally, I made the following changes:

1. always check stats last (because a difference in numbers is
   hardly actionable without testing other objects)
2. add a few more snapshot id checks in visits
3. deduplicated a hardcoded snapshot id.
---
 .../package/archive/tests/test_archive.py     |  20 +-
 swh/loader/package/cran/tests/test_cran.py    |  20 +-
 .../package/debian/tests/test_debian.py       |  80 ++++---
 .../package/deposit/tests/test_deposit.py     |  48 ++--
 .../package/nixguix/tests/test_nixguix.py     |  31 ++-
 swh/loader/package/npm/tests/test_npm.py      |  83 +++----
 swh/loader/package/opam/tests/test_opam.py    |   7 +-
 swh/loader/package/pypi/tests/test_pypi.py    | 205 +++++++++---------
 swh/loader/tests/__init__.py                  |  16 +-
 swh/loader/tests/test_init.py                 |   4 +-
 10 files changed, 296 insertions(+), 218 deletions(-)

diff --git a/swh/loader/package/archive/tests/test_archive.py b/swh/loader/package/archive/tests/test_archive.py
index a5928262..aeddcdf2 100644
--- a/swh/loader/package/archive/tests/test_archive.py
+++ b/swh/loader/package/archive/tests/test_archive.py
@@ -153,15 +153,6 @@ def test_archive_visit_with_release_artifact_no_prior_visit(
         "snapshot": 1,
     } == stats
 
-    expected_contents = map(hash_to_bytes, _expected_new_contents_first_visit)
-    assert list(swh_storage.content_missing_per_sha1(expected_contents)) == []
-
-    expected_dirs = map(hash_to_bytes, _expected_new_directories_first_visit)
-    assert list(swh_storage.directory_missing(expected_dirs)) == []
-
-    expected_revs = map(hash_to_bytes, _expected_new_revisions_first_visit)
-    assert list(swh_storage.revision_missing(expected_revs)) == []
-
     expected_snapshot = Snapshot(
         id=expected_snapshot_first_visit_id,
         branches={
@@ -170,13 +161,22 @@ def test_archive_visit_with_release_artifact_no_prior_visit(
             ),
             b"releases/0.1.0": SnapshotBranch(
                 target_type=TargetType.REVISION,
-                target=hash_to_bytes("44183488c0774ce3c957fa19ba695cf18a4a42b3"),
+                target=hash_to_bytes(list(_expected_new_revisions_first_visit)[0]),
             ),
         },
     )
 
     check_snapshot(expected_snapshot, swh_storage)
 
+    expected_contents = map(hash_to_bytes, _expected_new_contents_first_visit)
+    assert list(swh_storage.content_missing_per_sha1(expected_contents)) == []
+
+    expected_dirs = map(hash_to_bytes, _expected_new_directories_first_visit)
+    assert list(swh_storage.directory_missing(expected_dirs)) == []
+
+    expected_revs = map(hash_to_bytes, _expected_new_revisions_first_visit)
+    assert list(swh_storage.revision_missing(expected_revs)) == []
+
 
 def test_archive_2_visits_without_change(swh_storage, requests_mock_datadir):
     """With no prior visit, load a gnu project ends up with 1 snapshot
diff --git a/swh/loader/package/cran/tests/test_cran.py b/swh/loader/package/cran/tests/test_cran.py
index 1b2d28df..9c03b99f 100644
--- a/swh/loader/package/cran/tests/test_cran.py
+++ b/swh/loader/package/cran/tests/test_cran.py
@@ -182,9 +182,11 @@ def test_cran_one_visit(swh_storage, requests_mock_datadir):
         "snapshot_id": SNAPSHOT.id.hex(),
     }
 
-    check_snapshot(SNAPSHOT, swh_storage)
+    assert_last_visit_matches(
+        swh_storage, origin_url, status="full", type="cran", snapshot=SNAPSHOT.id
+    )
 
-    assert_last_visit_matches(swh_storage, origin_url, status="full", type="cran")
+    check_snapshot(SNAPSHOT, swh_storage)
 
     visit_stats = get_stats(swh_storage)
     assert {
@@ -230,7 +232,9 @@ def test_cran_2_visits_same_origin(swh_storage, requests_mock_datadir):
 
     check_snapshot(SNAPSHOT, swh_storage)
 
-    assert_last_visit_matches(swh_storage, origin_url, status="full", type="cran")
+    assert_last_visit_matches(
+        swh_storage, origin_url, status="full", type="cran", snapshot=SNAPSHOT.id
+    )
 
     visit_stats = get_stats(swh_storage)
     assert {
@@ -252,7 +256,13 @@ def test_cran_2_visits_same_origin(swh_storage, requests_mock_datadir):
         "snapshot_id": expected_snapshot_id,
     }
 
-    assert_last_visit_matches(swh_storage, origin_url, status="full", type="cran")
+    assert_last_visit_matches(
+        swh_storage,
+        origin_url,
+        status="full",
+        type="cran",
+        snapshot=hash_to_bytes(expected_snapshot_id),
+    )
 
     visit_stats2 = get_stats(swh_storage)
     visit_stats["origin_visit"] += 1
@@ -360,5 +370,5 @@ def test_cran_fail_to_build_or_load_extrinsic_metadata(
         } == visit_stats
 
         assert_last_visit_matches(
-            swh_storage, origin_url, status="partial", type="cran"
+            swh_storage, origin_url, status="partial", type="cran", snapshot=SNAPSHOT.id
         )
diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py
index 04a35dd8..21a79e47 100644
--- a/swh/loader/package/debian/tests/test_debian.py
+++ b/swh/loader/package/debian/tests/test_debian.py
@@ -116,19 +116,13 @@ def test_debian_first_visit(swh_storage, requests_mock_datadir):
         "snapshot_id": expected_snapshot_id,
     }
 
-    assert_last_visit_matches(swh_storage, URL, status="full", type="deb")
-
-    stats = get_stats(swh_storage)
-    assert {
-        "content": 42,
-        "directory": 2,
-        "origin": 1,
-        "origin_visit": 1,
-        "release": 0,
-        "revision": 1,  # all artifacts under 1 revision
-        "skipped_content": 0,
-        "snapshot": 1,
-    } == stats
+    assert_last_visit_matches(
+        swh_storage,
+        URL,
+        status="full",
+        type="deb",
+        snapshot=hash_to_bytes(expected_snapshot_id),
+    )
 
     expected_snapshot = Snapshot(
         id=hash_to_bytes(expected_snapshot_id),
@@ -142,6 +136,18 @@ def test_debian_first_visit(swh_storage, requests_mock_datadir):
 
     check_snapshot(expected_snapshot, swh_storage)
 
+    stats = get_stats(swh_storage)
+    assert {
+        "content": 42,
+        "directory": 2,
+        "origin": 1,
+        "origin_visit": 1,
+        "release": 0,
+        "revision": 1,  # all artifacts under 1 revision
+        "skipped_content": 0,
+        "snapshot": 1,
+    } == stats
+
 
 def test_debian_first_visit_then_another_visit(swh_storage, requests_mock_datadir):
     """With no prior visit, load a debian project ends up with 1 snapshot
@@ -162,19 +168,13 @@ def test_debian_first_visit_then_another_visit(swh_storage, requests_mock_datadi
         "snapshot_id": expected_snapshot_id,
     }
 
-    assert_last_visit_matches(swh_storage, URL, status="full", type="deb")
-
-    stats = get_stats(swh_storage)
-    assert {
-        "content": 42,
-        "directory": 2,
-        "origin": 1,
-        "origin_visit": 1,
-        "release": 0,
-        "revision": 1,  # all artifacts under 1 revision
-        "skipped_content": 0,
-        "snapshot": 1,
-    } == stats
+    assert_last_visit_matches(
+        swh_storage,
+        URL,
+        status="full",
+        type="deb",
+        snapshot=hash_to_bytes(expected_snapshot_id),
+    )
 
     expected_snapshot = Snapshot(
         id=hash_to_bytes(expected_snapshot_id),
@@ -188,10 +188,28 @@ def test_debian_first_visit_then_another_visit(swh_storage, requests_mock_datadi
 
     check_snapshot(expected_snapshot, swh_storage)
 
+    stats = get_stats(swh_storage)
+    assert {
+        "content": 42,
+        "directory": 2,
+        "origin": 1,
+        "origin_visit": 1,
+        "release": 0,
+        "revision": 1,  # all artifacts under 1 revision
+        "skipped_content": 0,
+        "snapshot": 1,
+    } == stats
+
     # No change in between load
     actual_load_status2 = loader.load()
     assert actual_load_status2["status"] == "uneventful"
-    assert_last_visit_matches(swh_storage, URL, status="full", type="deb")
+    assert_last_visit_matches(
+        swh_storage,
+        URL,
+        status="full",
+        type="deb",
+        snapshot=hash_to_bytes(expected_snapshot_id),
+    )
 
     stats2 = get_stats(swh_storage)
     assert {
@@ -406,7 +424,13 @@ def test_debian_multiple_packages(swh_storage, requests_mock_datadir):
         "snapshot_id": expected_snapshot_id,
     }
 
-    assert_last_visit_matches(swh_storage, URL, status="full", type="deb")
+    assert_last_visit_matches(
+        swh_storage,
+        URL,
+        status="full",
+        type="deb",
+        snapshot=hash_to_bytes(expected_snapshot_id),
+    )
 
     expected_snapshot = Snapshot(
         id=hash_to_bytes(expected_snapshot_id),
diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py
index 80a4ff74..f65cb5f9 100644
--- a/swh/loader/package/deposit/tests/test_deposit.py
+++ b/swh/loader/package/deposit/tests/test_deposit.py
@@ -177,19 +177,13 @@ def test_deposit_loading_ok(swh_storage, deposit_client, requests_mock_datadir):
         "snapshot_id": expected_snapshot_id,
     }
 
-    assert_last_visit_matches(loader.storage, url, status="full", type="deposit")
-
-    stats = get_stats(loader.storage)
-    assert {
-        "content": 303,
-        "directory": 12,
-        "origin": 1,
-        "origin_visit": 1,
-        "release": 0,
-        "revision": 1,
-        "skipped_content": 0,
-        "snapshot": 1,
-    } == stats
+    assert_last_visit_matches(
+        loader.storage,
+        url,
+        status="full",
+        type="deposit",
+        snapshot=hash_to_bytes(expected_snapshot_id),
+    )
 
     revision_id_hex = "637318680351f5d78856d13264faebbd91efe9bb"
     revision_id = hash_to_bytes(revision_id_hex)
@@ -287,6 +281,18 @@ def test_deposit_loading_ok(swh_storage, deposit_client, requests_mock_datadir):
 
     assert body == expected_body
 
+    stats = get_stats(loader.storage)
+    assert {
+        "content": 303,
+        "directory": 12,
+        "origin": 1,
+        "origin_visit": 1,
+        "release": 0,
+        "revision": 1,
+        "skipped_content": 0,
+        "snapshot": 1,
+    } == stats
+
 
 def test_deposit_loading_ok_2(swh_storage, deposit_client, requests_mock_datadir):
     """Field dates should be se appropriately
@@ -306,7 +312,13 @@ def test_deposit_loading_ok_2(swh_storage, deposit_client, requests_mock_datadir
         "status": "eventful",
         "snapshot_id": expected_snapshot_id,
     }
-    assert_last_visit_matches(loader.storage, url, status="full", type="deposit")
+    assert_last_visit_matches(
+        loader.storage,
+        url,
+        status="full",
+        type="deposit",
+        snapshot=hash_to_bytes(expected_snapshot_id),
+    )
 
     revision_id = "564d18943d71be80d0d73b43a77cfb205bcde96c"
     expected_snapshot = Snapshot(
@@ -479,4 +491,10 @@ def test_deposit_loading_ok_3(swh_storage, deposit_client, requests_mock_datadir
         "status": "eventful",
         "snapshot_id": expected_snapshot_id,
     }
-    assert_last_visit_matches(loader.storage, url, status="full", type="deposit")
+    assert_last_visit_matches(
+        loader.storage,
+        url,
+        status="full",
+        type="deposit",
+        snapshot=hash_to_bytes(expected_snapshot_id),
+    )
diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py
index 51487086..12fcf6b7 100644
--- a/swh/loader/package/nixguix/tests/test_nixguix.py
+++ b/swh/loader/package/nixguix/tests/test_nixguix.py
@@ -536,15 +536,19 @@ def test_raise_exception(swh_storage, requests_mock_datadir, mocker):
         "snapshot_id": SNAPSHOT1.id.hex(),
     }
 
-    check_snapshot(SNAPSHOT1, storage=swh_storage)
-
-    assert len(mock_download.mock_calls) == 2
-
     # The visit is partial because some artifact downloads failed
     assert_last_visit_matches(
-        swh_storage, sources_url, status="partial", type="nixguix"
+        swh_storage,
+        sources_url,
+        status="partial",
+        type="nixguix",
+        snapshot=SNAPSHOT1.id,
     )
 
+    check_snapshot(SNAPSHOT1, storage=swh_storage)
+
+    assert len(mock_download.mock_calls) == 2
+
 
 def test_load_nixguix_one_common_artifact_from_other_loader(
     swh_storage, datadir, requests_mock_datadir_visits, caplog
@@ -574,7 +578,11 @@ def test_load_nixguix_one_common_artifact_from_other_loader(
     assert actual_load_status["snapshot_id"] == expected_snapshot_id  # noqa
 
     assert_last_visit_matches(
-        archive_loader.storage, gnu_url, status="full", type="tar"
+        archive_loader.storage,
+        gnu_url,
+        status="full",
+        type="tar",
+        snapshot=hash_to_bytes(expected_snapshot_id),
     )
 
     # 2. Then ingest with the nixguix loader which lists the same artifact within its
@@ -599,8 +607,15 @@ def test_load_nixguix_one_common_artifact_from_other_loader(
     actual_load_status2 = loader.load()
     assert actual_load_status2["status"] == "eventful"
 
-    assert_last_visit_matches(swh_storage, sources_url, status="full", type="nixguix")
-
     snapshot_id = actual_load_status2["snapshot_id"]
+
+    assert_last_visit_matches(
+        swh_storage,
+        sources_url,
+        status="full",
+        type="nixguix",
+        snapshot=hash_to_bytes(snapshot_id),
+    )
+
     snapshot = snapshot_get_all_branches(swh_storage, hash_to_bytes(snapshot_id))
     assert snapshot
diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py
index 25f10e8a..000fb15e 100644
--- a/swh/loader/package/npm/tests/test_npm.py
+++ b/swh/loader/package/npm/tests/test_npm.py
@@ -318,29 +318,6 @@ def test_npm_loader_first_visit(swh_storage, requests_mock_datadir, org_api_info
         swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id
     )
 
-    stats = get_stats(swh_storage)
-
-    assert {
-        "content": len(_expected_new_contents_first_visit),
-        "directory": len(_expected_new_directories_first_visit),
-        "origin": 1,
-        "origin_visit": 1,
-        "release": 0,
-        "revision": len(_expected_new_revisions_first_visit),
-        "skipped_content": 0,
-        "snapshot": 1,
-    } == stats
-
-    contents = swh_storage.content_get(_expected_new_contents_first_visit)
-    count = sum(0 if content is None else 1 for content in contents)
-    assert count == len(_expected_new_contents_first_visit)
-
-    assert (
-        list(swh_storage.directory_missing(_expected_new_directories_first_visit)) == []
-    )
-
-    assert list(swh_storage.revision_missing(_expected_new_revisions_first_visit)) == []
-
     versions = [
         ("0.0.2", "d8a1c7474d2956ac598a19f0f27d52f7015f117e"),
         ("0.0.3", "5f9eb78af37ffd12949f235e86fac04898f9f72a"),
@@ -364,6 +341,16 @@ def test_npm_loader_first_visit(swh_storage, requests_mock_datadir, org_api_info
     )
     check_snapshot(expected_snapshot, swh_storage)
 
+    contents = swh_storage.content_get(_expected_new_contents_first_visit)
+    count = sum(0 if content is None else 1 for content in contents)
+    assert count == len(_expected_new_contents_first_visit)
+
+    assert (
+        list(swh_storage.directory_missing(_expected_new_directories_first_visit)) == []
+    )
+
+    assert list(swh_storage.revision_missing(_expected_new_revisions_first_visit)) == []
+
     metadata_authority = MetadataAuthority(
         type=MetadataAuthorityType.FORGE, url="https://npmjs.com/",
     )
@@ -397,6 +384,19 @@ def test_npm_loader_first_visit(swh_storage, requests_mock_datadir, org_api_info
             directory_swhid, metadata_authority,
         ) == PagedResult(next_page_token=None, results=expected_metadata,)
 
+    stats = get_stats(swh_storage)
+
+    assert {
+        "content": len(_expected_new_contents_first_visit),
+        "directory": len(_expected_new_directories_first_visit),
+        "origin": 1,
+        "origin_visit": 1,
+        "release": 0,
+        "revision": len(_expected_new_revisions_first_visit),
+        "skipped_content": 0,
+        "snapshot": 1,
+    } == stats
+
 
 def test_npm_loader_incremental_visit(swh_storage, requests_mock_datadir_visits):
     package = "org"
@@ -475,19 +475,6 @@ def test_npm_loader_version_divergence(swh_storage):
         swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id
     )
 
-    stats = get_stats(swh_storage)
-
-    assert {  # 1 new releases artifacts
-        "content": 534,
-        "directory": 153,
-        "origin": 1,
-        "origin_visit": 1,
-        "release": 0,
-        "revision": 2,
-        "skipped_content": 0,
-        "snapshot": 1,
-    } == stats
-
     expected_snapshot = Snapshot(
         id=expected_snapshot_id,
         branches={
@@ -506,6 +493,19 @@ def test_npm_loader_version_divergence(swh_storage):
     )
     check_snapshot(expected_snapshot, swh_storage)
 
+    stats = get_stats(swh_storage)
+
+    assert {  # 1 new releases artifacts
+        "content": 534,
+        "directory": 153,
+        "origin": 1,
+        "origin_visit": 1,
+        "release": 0,
+        "revision": 2,
+        "skipped_content": 0,
+        "snapshot": 1,
+    } == stats
+
 
 def test_npm_artifact_with_no_intrinsic_metadata(swh_storage, requests_mock_datadir):
     """Skip artifact with no intrinsic metadata during ingestion
@@ -525,12 +525,12 @@ def test_npm_artifact_with_no_intrinsic_metadata(swh_storage, requests_mock_data
         "snapshot_id": expected_snapshot.id.hex(),
     }
 
-    check_snapshot(expected_snapshot, swh_storage)
-
     assert_last_visit_matches(
         swh_storage, url, status="full", type="npm", snapshot=expected_snapshot.id
     )
 
+    check_snapshot(expected_snapshot, swh_storage)
+
 
 def test_npm_artifact_with_no_upload_time(swh_storage, requests_mock_datadir):
     """With no time upload, artifact is skipped
@@ -550,12 +550,12 @@ def test_npm_artifact_with_no_upload_time(swh_storage, requests_mock_datadir):
         "snapshot_id": expected_snapshot.id.hex(),
     }
 
-    check_snapshot(expected_snapshot, swh_storage)
-
     assert_last_visit_matches(
         swh_storage, url, status="partial", type="npm", snapshot=expected_snapshot.id
     )
 
+    check_snapshot(expected_snapshot, swh_storage)
+
 
 def test_npm_artifact_use_mtime_if_no_time(swh_storage, requests_mock_datadir):
     """With no time upload, artifact is skipped
@@ -586,12 +586,13 @@ def test_npm_artifact_use_mtime_if_no_time(swh_storage, requests_mock_datadir):
             ),
         },
     )
-    check_snapshot(expected_snapshot, swh_storage)
 
     assert_last_visit_matches(
         swh_storage, url, status="full", type="npm", snapshot=expected_snapshot.id
     )
 
+    check_snapshot(expected_snapshot, swh_storage)
+
 
 def test_npm_no_artifact(swh_storage, requests_mock_datadir):
     """If no artifacts at all is found for origin, the visit fails completely
diff --git a/swh/loader/package/opam/tests/test_opam.py b/swh/loader/package/opam/tests/test_opam.py
index 4f99e824..b73dbeb1 100644
--- a/swh/loader/package/opam/tests/test_opam.py
+++ b/swh/loader/package/opam/tests/test_opam.py
@@ -127,12 +127,13 @@ def test_opam_loader_one_version(tmpdir, requests_mock_datadir, datadir, swh_sto
             ),
         },
     )
-    check_snapshot(expected_snapshot, swh_storage)
 
     assert_last_visit_matches(
         swh_storage, url, status="full", type="opam", snapshot=expected_snapshot_id
     )
 
+    check_snapshot(expected_snapshot, swh_storage)
+
     stats = get_stats(swh_storage)
 
     assert {
@@ -195,12 +196,12 @@ def test_opam_loader_many_version(tmpdir, requests_mock_datadir, datadir, swh_st
         },
     )
 
-    check_snapshot(expected_snapshot, swh_storage)
-
     assert_last_visit_matches(
         swh_storage, url, status="full", type="opam", snapshot=expected_snapshot_id
     )
 
+    check_snapshot(expected_snapshot, swh_storage)
+
 
 def test_opam_revision(tmpdir, requests_mock_datadir, swh_storage, datadir):
 
diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py
index 3298864f..e8871a6d 100644
--- a/swh/loader/package/pypi/tests/test_pypi.py
+++ b/swh/loader/package/pypi/tests/test_pypi.py
@@ -218,6 +218,12 @@ def test_pypi_no_release_artifact(swh_storage, requests_mock_datadir_missing_all
     assert actual_load_status["status"] == "uneventful"
     assert actual_load_status["snapshot_id"] is not None
 
+    empty_snapshot = Snapshot(branches={})
+
+    assert_last_visit_matches(
+        swh_storage, url, status="partial", type="pypi", snapshot=empty_snapshot.id
+    )
+
     stats = get_stats(swh_storage)
     assert {
         "content": 0,
@@ -230,8 +236,6 @@ def test_pypi_no_release_artifact(swh_storage, requests_mock_datadir_missing_all
         "snapshot": 1,
     } == stats
 
-    assert_last_visit_matches(swh_storage, url, status="partial", type="pypi")
-
 
 def test_pypi_fail__load_snapshot(swh_storage, requests_mock_datadir):
     """problem during loading: {visit: failed, status: failed, no snapshot}
@@ -247,6 +251,8 @@ def test_pypi_fail__load_snapshot(swh_storage, requests_mock_datadir):
         actual_load_status = loader.load()
         assert actual_load_status == {"status": "failed"}
 
+        assert_last_visit_matches(swh_storage, url, status="failed", type="pypi")
+
         stats = get_stats(loader.storage)
 
         assert {
@@ -260,8 +266,6 @@ def test_pypi_fail__load_snapshot(swh_storage, requests_mock_datadir):
             "snapshot": 0,
         } == stats
 
-        assert_last_visit_matches(swh_storage, url, status="failed", type="pypi")
-
 
 # problem during loading:
 # {visit: partial, status: uneventful, no snapshot}
@@ -278,6 +282,8 @@ def test_pypi_release_with_traceback(swh_storage, requests_mock_datadir):
         actual_load_status = loader.load()
         assert actual_load_status == {"status": "failed"}
 
+        assert_last_visit_matches(swh_storage, url, status="failed", type="pypi")
+
         stats = get_stats(swh_storage)
 
         assert {
@@ -291,8 +297,6 @@ def test_pypi_release_with_traceback(swh_storage, requests_mock_datadir):
             "snapshot": 0,
         } == stats
 
-        assert_last_visit_matches(swh_storage, url, status="failed", type="pypi")
-
 
 # problem during loading: failure early enough in between swh contents...
 # some contents (contents, directories, etc...) have been written in storage
@@ -375,18 +379,23 @@ def test_pypi_visit_with_missing_artifact(
         "snapshot_id": expected_snapshot_id.hex(),
     }
 
-    stats = get_stats(swh_storage)
+    assert_last_visit_matches(
+        swh_storage, url, status="partial", type="pypi", snapshot=expected_snapshot_id,
+    )
 
-    assert {
-        "content": 3,
-        "directory": 2,
-        "origin": 1,
-        "origin_visit": 1,
-        "release": 0,
-        "revision": 1,
-        "skipped_content": 0,
-        "snapshot": 1,
-    } == stats
+    expected_snapshot = Snapshot(
+        id=hash_to_bytes(expected_snapshot_id),
+        branches={
+            b"releases/1.2.0": SnapshotBranch(
+                target=hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21"),
+                target_type=TargetType.REVISION,
+            ),
+            b"HEAD": SnapshotBranch(
+                target=b"releases/1.2.0", target_type=TargetType.ALIAS,
+            ),
+        },
+    )
+    check_snapshot(expected_snapshot, storage=swh_storage)
 
     expected_contents = map(
         hash_to_bytes,
@@ -417,23 +426,18 @@ def test_pypi_visit_with_missing_artifact(
     }
     assert list(swh_storage.revision_missing(expected_revs)) == []
 
-    expected_snapshot = Snapshot(
-        id=hash_to_bytes(expected_snapshot_id),
-        branches={
-            b"releases/1.2.0": SnapshotBranch(
-                target=hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21"),
-                target_type=TargetType.REVISION,
-            ),
-            b"HEAD": SnapshotBranch(
-                target=b"releases/1.2.0", target_type=TargetType.ALIAS,
-            ),
-        },
-    )
-    check_snapshot(expected_snapshot, storage=swh_storage)
+    stats = get_stats(swh_storage)
 
-    assert_last_visit_matches(
-        swh_storage, url, status="partial", type="pypi", snapshot=expected_snapshot_id,
-    )
+    assert {
+        "content": 3,
+        "directory": 2,
+        "origin": 1,
+        "origin_visit": 1,
+        "release": 0,
+        "revision": 1,
+        "skipped_content": 0,
+        "snapshot": 1,
+    } == stats
 
 
 def test_pypi_visit_with_1_release_artifact(swh_storage, requests_mock_datadir):
@@ -450,6 +454,28 @@ def test_pypi_visit_with_1_release_artifact(swh_storage, requests_mock_datadir):
         "snapshot_id": expected_snapshot_id.hex(),
     }
 
+    assert_last_visit_matches(
+        swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id
+    )
+
+    expected_snapshot = Snapshot(
+        id=expected_snapshot_id,
+        branches={
+            b"releases/1.1.0": SnapshotBranch(
+                target=hash_to_bytes("4c99891f93b81450385777235a37b5e966dd1571"),
+                target_type=TargetType.REVISION,
+            ),
+            b"releases/1.2.0": SnapshotBranch(
+                target=hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21"),
+                target_type=TargetType.REVISION,
+            ),
+            b"HEAD": SnapshotBranch(
+                target=b"releases/1.2.0", target_type=TargetType.ALIAS,
+            ),
+        },
+    )
+    check_snapshot(expected_snapshot, swh_storage)
+
     stats = get_stats(swh_storage)
     assert {
         "content": 6,
@@ -499,28 +525,6 @@ def test_pypi_visit_with_1_release_artifact(swh_storage, requests_mock_datadir):
     }
     assert list(swh_storage.revision_missing(expected_revs)) == []
 
-    expected_snapshot = Snapshot(
-        id=expected_snapshot_id,
-        branches={
-            b"releases/1.1.0": SnapshotBranch(
-                target=hash_to_bytes("4c99891f93b81450385777235a37b5e966dd1571"),
-                target_type=TargetType.REVISION,
-            ),
-            b"releases/1.2.0": SnapshotBranch(
-                target=hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21"),
-                target_type=TargetType.REVISION,
-            ),
-            b"HEAD": SnapshotBranch(
-                target=b"releases/1.2.0", target_type=TargetType.ALIAS,
-            ),
-        },
-    )
-    check_snapshot(expected_snapshot, swh_storage)
-
-    assert_last_visit_matches(
-        swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id
-    )
-
 
 def test_pypi_multiple_visits_with_no_change(swh_storage, requests_mock_datadir):
     """Multiple visits with no changes results in 1 same snapshot
@@ -539,19 +543,6 @@ def test_pypi_multiple_visits_with_no_change(swh_storage, requests_mock_datadir)
         swh_storage, url, status="full", type="pypi", snapshot=snapshot_id
     )
 
-    stats = get_stats(swh_storage)
-
-    assert {
-        "content": 6,
-        "directory": 4,
-        "origin": 1,
-        "origin_visit": 1,
-        "release": 0,
-        "revision": 2,
-        "skipped_content": 0,
-        "snapshot": 1,
-    } == stats
-
     expected_snapshot = Snapshot(
         id=snapshot_id,
         branches={
@@ -570,6 +561,19 @@ def test_pypi_multiple_visits_with_no_change(swh_storage, requests_mock_datadir)
     )
     check_snapshot(expected_snapshot, swh_storage)
 
+    stats = get_stats(swh_storage)
+
+    assert {
+        "content": 6,
+        "directory": 4,
+        "origin": 1,
+        "origin_visit": 1,
+        "release": 0,
+        "revision": 2,
+        "skipped_content": 0,
+        "snapshot": 1,
+    } == stats
+
     actual_load_status2 = loader.load()
     assert actual_load_status2 == {
         "status": "uneventful",
@@ -637,6 +641,33 @@ def test_pypi_incremental_visit(swh_storage, requests_mock_datadir_visits):
         swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id2
     )
 
+    expected_snapshot = Snapshot(
+        id=expected_snapshot_id2,
+        branches={
+            b"releases/1.1.0": SnapshotBranch(
+                target=hash_to_bytes("4c99891f93b81450385777235a37b5e966dd1571"),
+                target_type=TargetType.REVISION,
+            ),
+            b"releases/1.2.0": SnapshotBranch(
+                target=hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21"),
+                target_type=TargetType.REVISION,
+            ),
+            b"releases/1.3.0": SnapshotBranch(
+                target=hash_to_bytes("51247143b01445c9348afa9edfae31bf7c5d86b1"),
+                target_type=TargetType.REVISION,
+            ),
+            b"HEAD": SnapshotBranch(
+                target=b"releases/1.3.0", target_type=TargetType.ALIAS,
+            ),
+        },
+    )
+
+    assert_last_visit_matches(
+        swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot.id
+    )
+
+    check_snapshot(expected_snapshot, swh_storage)
+
     assert {
         "content": 6 + 1,  # 1 more content
         "directory": 4 + 2,  # 2 more directories
@@ -692,33 +723,6 @@ def test_pypi_incremental_visit(swh_storage, requests_mock_datadir_visits):
 
     assert list(swh_storage.revision_missing(expected_revs)) == []
 
-    expected_snapshot = Snapshot(
-        id=expected_snapshot_id2,
-        branches={
-            b"releases/1.1.0": SnapshotBranch(
-                target=hash_to_bytes("4c99891f93b81450385777235a37b5e966dd1571"),
-                target_type=TargetType.REVISION,
-            ),
-            b"releases/1.2.0": SnapshotBranch(
-                target=hash_to_bytes("e445da4da22b31bfebb6ffc4383dbf839a074d21"),
-                target_type=TargetType.REVISION,
-            ),
-            b"releases/1.3.0": SnapshotBranch(
-                target=hash_to_bytes("51247143b01445c9348afa9edfae31bf7c5d86b1"),
-                target_type=TargetType.REVISION,
-            ),
-            b"HEAD": SnapshotBranch(
-                target=b"releases/1.3.0", target_type=TargetType.ALIAS,
-            ),
-        },
-    )
-
-    check_snapshot(expected_snapshot, swh_storage)
-
-    assert_last_visit_matches(
-        swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot.id
-    )
-
     urls = [
         m.url
         for m in requests_mock_datadir_visits.request_history
@@ -753,6 +757,10 @@ def test_pypi_visit_1_release_with_2_artifacts(swh_storage, requests_mock_datadi
         "snapshot_id": expected_snapshot_id.hex(),
     }
 
+    assert_last_visit_matches(
+        swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot_id
+    )
+
     expected_snapshot = Snapshot(
         id=expected_snapshot_id,
         branches={
@@ -768,10 +776,6 @@ def test_pypi_visit_1_release_with_2_artifacts(swh_storage, requests_mock_datadi
     )
     check_snapshot(expected_snapshot, swh_storage)
 
-    assert_last_visit_matches(
-        swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot.id
-    )
-
 
 def test_pypi_artifact_with_no_intrinsic_metadata(swh_storage, requests_mock_datadir):
     """Skip artifact with no intrinsic metadata during ingestion
@@ -789,12 +793,13 @@ def test_pypi_artifact_with_no_intrinsic_metadata(swh_storage, requests_mock_dat
 
     # no branch as one artifact without any intrinsic metadata
     expected_snapshot = Snapshot(id=expected_snapshot_id, branches={})
-    check_snapshot(expected_snapshot, swh_storage)
 
     assert_last_visit_matches(
         swh_storage, url, status="full", type="pypi", snapshot=expected_snapshot.id
     )
 
+    check_snapshot(expected_snapshot, swh_storage)
+
 
 def test_pypi_origin_not_found(swh_storage, requests_mock_datadir):
     url = "https://pypi.org/project/unknown"
diff --git a/swh/loader/tests/__init__.py b/swh/loader/tests/__init__.py
index a9601ff3..fc574fcf 100644
--- a/swh/loader/tests/__init__.py
+++ b/swh/loader/tests/__init__.py
@@ -115,7 +115,7 @@ class InexistentObjectsError(AssertionError):
 
 
 def check_snapshot(
-    snapshot: Snapshot,
+    expected_snapshot: Snapshot,
     storage: StorageInterface,
     allowed_empty: Iterable[Tuple[TargetType, bytes]] = [],
 ) -> Snapshot:
@@ -124,7 +124,7 @@ def check_snapshot(
     - each object reference up to the revision/release targets exists
 
     Args:
-        snapshot: full snapshot to check for existence and consistency
+        expected_snapshot: full snapshot to check for existence and consistency
         storage: storage to lookup information into
         allowed_empty: Iterable of branch we allow to be empty (some edge case loaders
           allows this case to happen, nixguix for example allows the branch evaluation"
@@ -136,12 +136,14 @@ def check_snapshot(
         needed.
 
     """
-    if not isinstance(snapshot, Snapshot):
-        raise AssertionError(f"variable 'snapshot' must be a snapshot: {snapshot!r}")
+    if not isinstance(expected_snapshot, Snapshot):
+        raise AssertionError(
+            f"argument 'expected_snapshot' must be a snapshot: {expected_snapshot!r}"
+        )
 
-    expected_snapshot = snapshot_get_all_branches(storage, snapshot.id)
-    if expected_snapshot is None:
-        raise AssertionError(f"Snapshot {snapshot.id.hex()} is not found")
+    snapshot = snapshot_get_all_branches(storage, expected_snapshot.id)
+    if snapshot is None:
+        raise AssertionError(f"Snapshot {expected_snapshot.id.hex()} is not found")
 
     assert snapshot == expected_snapshot
 
diff --git a/swh/loader/tests/test_init.py b/swh/loader/tests/test_init.py
index 78226b42..8f660e70 100644
--- a/swh/loader/tests/test_init.py
+++ b/swh/loader/tests/test_init.py
@@ -400,7 +400,9 @@ def test_check_snapshot_failures(swh_storage):
     )
 
     # 0. not a Snapshot object, raise!
-    with pytest.raises(AssertionError, match="variable 'snapshot' must be a snapshot"):
+    with pytest.raises(
+        AssertionError, match="argument 'expected_snapshot' must be a snapshot"
+    ):
         check_snapshot(ORIGIN_VISIT, swh_storage)
 
     # 1. snapshot id is correct but branches mismatched
-- 
GitLab