From 4316bcbbec36bb93e66f066a8e727ff6853e2f71 Mon Sep 17 00:00:00 2001
From: Antoine Lambert <anlambert@softwareheritage.org>
Date: Mon, 3 Jun 2024 14:28:41 +0200
Subject: [PATCH] loader: Ensure to fetch latest snapshot produced by same
 visit type

SWH data model allows an origin to have multiple visit types so we must
ensure to retrieve the latest snapshot for the same visit type performed
by a loader.

Related to swh/meta#5092.
---
 requirements-swh.txt                    |  2 +-
 requirements-test.txt                   |  2 +-
 swh/loader/core/loader.py               | 12 ++++++++----
 swh/loader/package/loader.py            |  6 +++++-
 swh/loader/package/tests/test_loader.py |  4 ++--
 5 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/requirements-swh.txt b/requirements-swh.txt
index 540c505b..83806782 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -2,4 +2,4 @@ swh.core >= 2.23.0
 swh.model >= 6.13.0
 swh.objstorage >= 0.2.2
 swh.scheduler >= 2.3.0
-swh.storage >= 2.0.0
+swh.storage >= 2.4.1
diff --git a/requirements-test.txt b/requirements-test.txt
index 63d66e7c..ac229199 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -4,7 +4,7 @@ requests_mock >= 1.11.0
 urllib3
 swh-core[testing]
 swh.scheduler[testing] >= 2.3.0
-swh-storage[testing] >= 2.0.0
+swh-storage[testing] >= 2.4.1
 swh-vault >= 1.12.2
 types-click
 types-python-dateutil
diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py
index 67d9b035..66dd0f8a 100644
--- a/swh/loader/core/loader.py
+++ b/swh/loader/core/loader.py
@@ -707,13 +707,17 @@ class NodeLoader(BaseLoader, ABC):
         self.log.debug("Loader checksums computation: %s", self.checksum_layout)
 
     def prepare(self) -> None:
-        self.last_snapshot = snapshot_get_latest(self.storage, self.origin.url)
+        self.last_snapshot = snapshot_get_latest(
+            self.storage,
+            self.origin.url,
+            visit_type=self.visit_type,
+        )
 
     def load_status(self) -> Dict[str, Any]:
         return {
-            "status": "uneventful"
-            if self.last_snapshot == self.snapshot
-            else "eventful"
+            "status": (
+                "uneventful" if self.last_snapshot == self.snapshot else "eventful"
+            )
         }
 
     def cleanup(self) -> None:
diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py
index aa96d3dd..33ad8c07 100644
--- a/swh/loader/package/loader.py
+++ b/swh/loader/package/loader.py
@@ -228,7 +228,11 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]):
 
     def last_snapshot(self) -> Optional[Snapshot]:
         """Retrieve the last snapshot out of the last visit."""
-        return snapshot_get_latest(self.storage, self.origin.url)
+        return snapshot_get_latest(
+            self.storage,
+            self.origin.url,
+            visit_type=self.visit_type,
+        )
 
     def new_packageinfo_to_extid(self, p_info: TPackageInfo) -> Optional[PartialExtID]:
         return p_info.extid()
diff --git a/swh/loader/package/tests/test_loader.py b/swh/loader/package/tests/test_loader.py
index 1669e5da..1e520b55 100644
--- a/swh/loader/package/tests/test_loader.py
+++ b/swh/loader/package/tests/test_loader.py
@@ -301,7 +301,7 @@ def test_load_extids() -> None:
     date = datetime.datetime.now(tz=datetime.timezone.utc)
     storage.origin_add([Origin(url=origin)])
     storage.origin_visit_add(
-        [OriginVisit(origin=origin, visit=1, date=date, type="tar")]
+        [OriginVisit(origin=origin, visit=1, date=date, type="stub")]
     )
     storage.origin_visit_status_add(
         [
@@ -466,7 +466,7 @@ def test_load_upgrade_from_revision_extids(caplog):
     date = datetime.datetime.now(tz=datetime.timezone.utc)
     storage.origin_add([Origin(url=origin)])
     storage.origin_visit_add(
-        [OriginVisit(origin=origin, visit=1, date=date, type="tar")]
+        [OriginVisit(origin=origin, visit=1, date=date, type="stub")]
     )
     storage.origin_visit_status_add(
         [
-- 
GitLab