Skip to content
Snippets Groups Projects
Commit 571d69f9 authored by Kumar Shivendu's avatar Kumar Shivendu Committed by Kumar Shivendu
Browse files

fix(hex): Use page_size for stopping condition

parent 6d228a81
No related branches found
No related tags found
No related merge requests found
......@@ -47,6 +47,7 @@ class HexLister(Lister[HexListerState, HexListerPage]):
self,
scheduler: SchedulerInterface,
instance: str = "hex",
page_size: int = 100,
credentials: CredentialsType = None,
):
super().__init__(
......@@ -56,6 +57,7 @@ class HexLister(Lister[HexListerState, HexListerPage]):
instance=instance,
)
# TODO: Add authentication support
self.page_size = page_size
self.session.headers.update({"Accept": "application/json"})
......@@ -73,12 +75,13 @@ class HexLister(Lister[HexListerState, HexListerPage]):
url,
params={
"search": f"updated_after:{self.state.page_updated_at}",
# We expect 100 packages per page. The API doesn't allow us to change that.
},
).json()
yield body
if len(body) == 0:
if len(body) < self.page_size: # Always 100 in when running on the real API
break
def get_origins_from_page(self, page: HexListerPage) -> Iterator[ListedOrigin]:
......
......@@ -59,14 +59,14 @@ def test_full_lister_hex(
mock_hexpm_page("2019-03-27T00:32:47.822901Z", p3_json)
mock_hexpm_page("2022-09-09T21:00:14.993273Z", [])
lister = HexLister(swh_scheduler)
lister = HexLister(swh_scheduler, page_size=4)
stats = lister.run()
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
lister_state = lister.get_state_from_scheduler()
assert stats.pages == 4
assert stats.origins == 10 # 4 + 4 + 2 + 0
assert stats.pages == 3 # 4 + 4 + 2 (2 < page_size so lister stops at page 3)
assert stats.origins == 10
check_listed_origins(
p1_origin_urls + p2_origin_urls + p3_origin_urls, scheduler_origins
......@@ -81,7 +81,7 @@ def test_hex_incremental_lister(
mock_hexpm_page,
hexpm_page,
):
lister = HexLister(swh_scheduler)
lister = HexLister(swh_scheduler, page_size=4)
# First run: P1 and P2 return 4 origins each and P3 returns 0
p1_origin_urls, p1_json = hexpm_page(1)
......@@ -114,7 +114,7 @@ def test_hex_incremental_lister(
stats = lister.run()
assert stats.pages == 2
assert stats.pages == 1
assert stats.origins == 2
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
......@@ -149,7 +149,7 @@ def test_hex_incremental_lister(
@pytest.mark.parametrize("http_code", [400, 500])
def test_hex_lister_http_error(swh_scheduler, http_code, mock_hexpm_page, hexpm_page):
"""Test handling of some HTTP errors commonly encountered"""
lister = HexLister(swh_scheduler)
lister = HexLister(swh_scheduler, page_size=4)
p1_origin_urls, p1_json = hexpm_page(1)
_, p3_json = hexpm_page(3)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment