Skip to content
Snippets Groups Projects
Commit c6aa490f authored by Antoine Lambert's avatar Antoine Lambert
Browse files

crates: Record lister state only if all crates were processed

Previously, the lister state was recorded regardless if errors occurred
when listing crates as the finalize method is called regardless of raised
exception during listing.

As a consequence some crates could be missed as the incremental listing
restarts from the dump date of the last processed crate database.

So ensure all crates have been processed by the lister before recording
its state.
parent aafaebd5
No related branches found
No related tags found
1 merge request!532crates: Fixes and improvements
......@@ -82,6 +82,7 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]):
enable_origins=enable_origins,
)
self.index_metadata: Dict[str, str] = {}
self.all_crates_processed = False
def state_from_dict(self, d: Dict[str, Any]) -> CratesListerState:
index_last_update = d.get("index_last_update")
......@@ -210,6 +211,7 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]):
page.append(self.page_entry_dict(v))
yield page
self.all_crates_processed = True
def get_origins_from_page(self, page: CratesListerPage) -> Iterator[ListedOrigin]:
"""Iterate on all crate pages and yield ListedOrigin instances."""
......@@ -255,8 +257,7 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]):
)
def finalize(self) -> None:
last: datetime = iso8601.parse_date(self.index_metadata["timestamp"])
if not self.state.index_last_update:
if not self.state.index_last_update and self.all_crates_processed:
last = iso8601.parse_date(self.index_metadata["timestamp"])
self.state.index_last_update = last
self.updated = True
# Copyright (C) 2022 The Software Heritage developers
# Copyright (C) 2022-2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import iso8601
import pytest
from swh.lister.crates.lister import CratesLister, CratesListerState
......@@ -250,3 +252,16 @@ def test_crates_lister_incremental_nothing_new(
assert res.pages == 0
assert res.origins == 0
def test_crates_lister_error_when_processing_crate(
swh_scheduler, requests_mock_datadir, mocker
):
"""Lister state should not be recorded to scheduler is an error occurred
when processing crate data."""
lister = CratesLister(scheduler=swh_scheduler)
mocker.patch.object(lister, "page_entry_dict").side_effect = IndexError()
with pytest.raises(IndexError):
lister.run()
assert lister.get_state_from_scheduler().index_last_update is None
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment