Skip to content
Snippets Groups Projects
Commit 4f6b3f3f authored by Antoine Lambert's avatar Antoine Lambert
Browse files

conda: Yield listed origins after all artifacts in a page are processed

swh-scheduler will deduplicate listed origins according to their URL
and visit type but not according to their extra loader arguments.

Previously, listed origins were yielded after each processed artifact
in a page so we could lose some package version info due to the
deduplication process.

So ensure to yield listed origins once all artifacts in a page have
been processed.
parent 31eb5f63
No related branches found
No related tags found
No related merge requests found
...@@ -71,7 +71,9 @@ class CondaLister(StatelessLister[CondaListerPage]): ...@@ -71,7 +71,9 @@ class CondaLister(StatelessLister[CondaListerPage]):
assert self.lister_obj.id is not None assert self.lister_obj.id is not None
arch, packages = page arch, packages = page
package_names = set()
for filename, package_metadata in packages.items(): for filename, package_metadata in packages.items():
package_names.add(package_metadata["name"])
version_key = ( version_key = (
f"{arch}/{package_metadata['version']}-{package_metadata['build']}" f"{arch}/{package_metadata['version']}-{package_metadata['build']}"
) )
...@@ -102,22 +104,20 @@ class CondaLister(StatelessLister[CondaListerPage]): ...@@ -102,22 +104,20 @@ class CondaLister(StatelessLister[CondaListerPage]):
elif "date" in package_metadata: elif "date" in package_metadata:
package_date = iso8601.parse_date(package_metadata["date"]) package_date = iso8601.parse_date(package_metadata["date"])
last_update = None
if package_date: if package_date:
artifact["date"] = package_date.isoformat() artifact["date"] = package_date.isoformat()
self.package_dates[package_metadata["name"]].append(package_date) self.package_dates[package_metadata["name"]].append(package_date)
last_update = max(self.package_dates[package_metadata["name"]])
for package_name in package_names:
package_dates = self.package_dates[package_name]
yield ListedOrigin( yield ListedOrigin(
lister_id=self.lister_obj.id, lister_id=self.lister_obj.id,
visit_type=self.VISIT_TYPE, visit_type=self.VISIT_TYPE,
url=self.ORIGIN_URL_PATTERN.format( url=self.ORIGIN_URL_PATTERN.format(
channel=self.channel, pkgname=package_metadata["name"] channel=self.channel, pkgname=package_name
), ),
last_update=last_update, last_update=max(package_dates, default=None),
extra_loader_arguments={ extra_loader_arguments={
"artifacts": [ "artifacts": list(self.packages[package_name].values())
v for k, v in self.packages[package_metadata["name"]].items()
],
}, },
) )
...@@ -3,36 +3,14 @@ ...@@ -3,36 +3,14 @@
# License: GNU General Public License version 3, or any later version # License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information # See top-level LICENSE file for more information
from swh.lister.conda.lister import CondaLister import pytest
def test_conda_lister_free_channel(datadir, requests_mock_datadir, swh_scheduler):
lister = CondaLister(
scheduler=swh_scheduler, channel="free", archs=["linux-64", "osx-64", "win-64"]
)
res = lister.run()
assert res.pages == 3
assert res.origins == 11
def test_conda_lister_conda_forge_channel(
datadir, requests_mock_datadir, swh_scheduler
):
lister = CondaLister(
scheduler=swh_scheduler,
url="https://conda.anaconda.org",
channel="conda-forge",
archs=["linux-64"],
)
res = lister.run()
assert res.pages == 1 from swh.lister.conda.lister import CondaLister
assert res.origins == 2
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
expected_origins = [ @pytest.fixture
def expected_origins():
return [
{ {
"url": "https://anaconda.org/conda-forge/21cmfast", "url": "https://anaconda.org/conda-forge/21cmfast",
"artifacts": [ "artifacts": [
...@@ -75,6 +53,33 @@ def test_conda_lister_conda_forge_channel( ...@@ -75,6 +53,33 @@ def test_conda_lister_conda_forge_channel(
}, },
] ]
def test_conda_lister_free_channel(datadir, requests_mock_datadir, swh_scheduler):
lister = CondaLister(
scheduler=swh_scheduler, channel="free", archs=["linux-64", "osx-64", "win-64"]
)
res = lister.run()
assert res.pages == 3
assert res.origins == 11
def test_conda_lister_conda_forge_channel(
requests_mock_datadir, swh_scheduler, expected_origins
):
lister = CondaLister(
scheduler=swh_scheduler,
url="https://conda.anaconda.org",
channel="conda-forge",
archs=["linux-64"],
)
res = lister.run()
assert res.pages == 1
assert res.origins == 2
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
assert len(scheduler_origins) == len(expected_origins) assert len(scheduler_origins) == len(expected_origins)
assert [ assert [
...@@ -92,3 +97,23 @@ def test_conda_lister_conda_forge_channel( ...@@ -92,3 +97,23 @@ def test_conda_lister_conda_forge_channel(
) )
for expected in sorted(expected_origins, key=lambda expected: expected["url"]) for expected in sorted(expected_origins, key=lambda expected: expected["url"])
] ]
def test_conda_lister_number_of_yielded_origins(
requests_mock_datadir, swh_scheduler, expected_origins
):
"""Check that a single ListedOrigin instance is sent by expected origins."""
lister = CondaLister(
scheduler=swh_scheduler,
url="https://conda.anaconda.org",
channel="conda-forge",
archs=["linux-64"],
)
listed_origins = []
for page in lister.get_pages():
listed_origins += list(lister.get_origins_from_page(page))
assert sorted([listed_origin.url for listed_origin in listed_origins]) == sorted(
[origin["url"] for origin in expected_origins]
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment