diff --git a/swh/lister/crates/lister.py b/swh/lister/crates/lister.py index e31756c53f926983fdf743de1e8740d0482cc739..0e41be71115c82d8a63edded94ae201f78e598d2 100644 --- a/swh/lister/crates/lister.py +++ b/swh/lister/crates/lister.py @@ -82,6 +82,7 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]): enable_origins=enable_origins, ) self.index_metadata: Dict[str, str] = {} + self.all_crates_processed = False def state_from_dict(self, d: Dict[str, Any]) -> CratesListerState: index_last_update = d.get("index_last_update") @@ -210,6 +211,7 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]): page.append(self.page_entry_dict(v)) yield page + self.all_crates_processed = True def get_origins_from_page(self, page: CratesListerPage) -> Iterator[ListedOrigin]: """Iterate on all crate pages and yield ListedOrigin instances.""" @@ -255,8 +257,7 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]): ) def finalize(self) -> None: - last: datetime = iso8601.parse_date(self.index_metadata["timestamp"]) - - if not self.state.index_last_update: + if not self.state.index_last_update and self.all_crates_processed: + last = iso8601.parse_date(self.index_metadata["timestamp"]) self.state.index_last_update = last self.updated = True diff --git a/swh/lister/crates/tests/test_lister.py b/swh/lister/crates/tests/test_lister.py index ebc9220c4ddd75f78f87853aad3874aac9657d36..82a41a6394479f7e6e16f69b9b329cc8f9d42838 100644 --- a/swh/lister/crates/tests/test_lister.py +++ b/swh/lister/crates/tests/test_lister.py @@ -1,8 +1,10 @@ -# Copyright (C) 2022 The Software Heritage developers +# Copyright (C) 2022-2024 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information + import iso8601 +import pytest from swh.lister.crates.lister import CratesLister, CratesListerState @@ -250,3 +252,16 @@ def test_crates_lister_incremental_nothing_new( assert res.pages == 0 assert res.origins == 0 + + +def test_crates_lister_error_when_processing_crate( + swh_scheduler, requests_mock_datadir, mocker +): + """Lister state should not be recorded to scheduler is an error occurred + when processing crate data.""" + lister = CratesLister(scheduler=swh_scheduler) + mocker.patch.object(lister, "page_entry_dict").side_effect = IndexError() + with pytest.raises(IndexError): + lister.run() + + assert lister.get_state_from_scheduler().index_last_update is None