From b773bc11e8fbbc4ab4355121399be1080ddee1a2 Mon Sep 17 00:00:00 2001 From: Antoine Lambert <anlambert@softwareheritage.org> Date: Mon, 7 Oct 2024 16:46:29 +0200 Subject: [PATCH] package: Harmonize the way package versions are sorted Instead of implementing the versions sorting in each package loader prefer to have a base implementation in swh.loader.package.PackageLoader class through the get_sorted_versions method. It relies on the looseversion module enabling to interact with heterogeneous version schemes which works pretty well with a large majority of package loaders. The get_default_version method of the PackageLoader class now also has a base implementation returning the last element from the list returned by the get_sorted_versions method. As a consequence, each snapshot produced by a package loader contains a HEAD alias branch targeting the branch for the highest version number of a package. Both methods can be reimplemented in package loaders for special cases like debian for instance. Also remove the use of the packaging module to parse versions as it is only dedicated to parse Python package versions. Related to swh/devel/swh-lister#4711. --- requirements.txt | 1 - swh/loader/package/arch/loader.py | 18 +------- swh/loader/package/arch/tests/test_arch.py | 4 +- swh/loader/package/archive/loader.py | 4 -- swh/loader/package/aur/loader.py | 17 +------ swh/loader/package/aur/tests/test_aur.py | 4 +- swh/loader/package/bioconductor/loader.py | 11 +---- swh/loader/package/conda/loader.py | 21 +-------- swh/loader/package/conda/tests/test_conda.py | 4 +- swh/loader/package/cpan/loader.py | 17 +------ swh/loader/package/cpan/tests/test_cpan.py | 4 +- swh/loader/package/cran/loader.py | 8 +--- swh/loader/package/crates/loader.py | 17 +------ .../package/crates/tests/test_crates.py | 4 +- swh/loader/package/debian/loader.py | 9 +++- .../package/debian/tests/test_debian.py | 22 ++++++--- swh/loader/package/deposit/loader.py | 8 ---- swh/loader/package/hackage/loader.py | 17 +------ .../package/hackage/tests/test_hackage.py | 4 +- swh/loader/package/hex/loader.py | 6 +-- swh/loader/package/loader.py | 42 ++++++++++++----- swh/loader/package/maven/loader.py | 4 -- swh/loader/package/npm/loader.py | 2 +- swh/loader/package/opam/loader.py | 4 -- swh/loader/package/pubdev/loader.py | 20 +-------- .../package/pubdev/tests/test_pubdev.py | 10 ++--- swh/loader/package/puppet/loader.py | 17 +------ .../package/puppet/tests/test_puppet.py | 4 +- swh/loader/package/pypi/loader.py | 2 +- swh/loader/package/rpm/loader.py | 4 -- swh/loader/package/rubygems/loader.py | 9 +--- .../package/rubygems/tests/test_rubygems.py | 4 +- swh/loader/package/tests/test_loader.py | 45 +++++++++++++------ .../package/tests/test_loader_metadata.py | 2 +- 34 files changed, 124 insertions(+), 245 deletions(-) diff --git a/requirements.txt b/requirements.txt index fe9cde2b..ecb919fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,3 @@ python-dateutil tenacity >= 8.4.2 typing-extensions toml -packaging diff --git a/swh/loader/package/arch/loader.py b/swh/loader/package/arch/loader.py index 7c1477aa..24883cf1 100644 --- a/swh/loader/package/arch/loader.py +++ b/swh/loader/package/arch/loader.py @@ -3,7 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from distutils.version import LooseVersion + from pathlib import Path import re from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple @@ -79,21 +79,7 @@ class ArchLoader(PackageLoader[ArchPackageInfo]): ["0.1.1", "0.10.2"] """ - versions = list(self.artifacts.keys()) - versions.sort(key=LooseVersion) - return versions - - def get_default_version(self) -> str: - """Get the newest release version of an Arch Linux package - - Returns: - A string representing a version - - Example:: - - "0.1.2" - """ - return self.get_versions()[-1] + return list(self.artifacts) def get_package_info(self, version: str) -> Iterator[Tuple[str, ArchPackageInfo]]: """Get release name and package information from version. diff --git a/swh/loader/package/arch/tests/test_arch.py b/swh/loader/package/arch/tests/test_arch.py index 51946e79..096377e4 100644 --- a/swh/loader/package/arch/tests/test_arch.py +++ b/swh/loader/package/arch/tests/test_arch.py @@ -92,7 +92,7 @@ EXPECTED_PACKAGES = [ ] -def test_get_versions(swh_storage): +def test_get_sorted_versions(swh_storage): loader = ArchLoader( swh_storage, url=EXPECTED_PACKAGES[0]["url"], @@ -100,7 +100,7 @@ def test_get_versions(swh_storage): arch_metadata=EXPECTED_PACKAGES[0]["arch_metadata"], ) - assert loader.get_versions() == [ + assert loader.get_sorted_versions() == [ "1:1.3_20190211-1", "1:1.3_20220414-1", ] diff --git a/swh/loader/package/archive/loader.py b/swh/loader/package/archive/loader.py index 60c4d4ab..12c81ccd 100644 --- a/swh/loader/package/archive/loader.py +++ b/swh/loader/package/archive/loader.py @@ -121,10 +121,6 @@ class ArchiveLoader(PackageLoader[ArchivePackageInfo]): versions.append(v) return versions - def get_default_version(self) -> str: - # It's the most recent, so for this loader, it's the last one - return self.artifacts[-1]["version"] - def get_package_info( self, version: str ) -> Iterator[Tuple[str, ArchivePackageInfo]]: diff --git a/swh/loader/package/aur/loader.py b/swh/loader/package/aur/loader.py index 66f36ffc..8da941f5 100644 --- a/swh/loader/package/aur/loader.py +++ b/swh/loader/package/aur/loader.py @@ -3,7 +3,6 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from distutils.version import LooseVersion from pathlib import Path import re from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple @@ -89,21 +88,7 @@ class AurLoader(PackageLoader[AurPackageInfo]): ["0.1.1", "0.10.2"] """ - versions = list(self.artifacts) - versions.sort(key=LooseVersion) - return versions - - def get_default_version(self) -> str: - """Get the newest release version of an Aur package - - Returns: - A string representing a version - - Example:: - - "0.1.2" - """ - return self.get_versions()[-1] + return list(self.artifacts) def get_package_info(self, version: str) -> Iterator[Tuple[str, AurPackageInfo]]: """Get release name and package information from version diff --git a/swh/loader/package/aur/tests/test_aur.py b/swh/loader/package/aur/tests/test_aur.py index c672a80d..3e64ccbe 100644 --- a/swh/loader/package/aur/tests/test_aur.py +++ b/swh/loader/package/aur/tests/test_aur.py @@ -110,7 +110,7 @@ EXPECTED_PACKAGES = [ ] -def test_get_versions(swh_storage): +def test_get_sorted_versions(swh_storage): loader = AurLoader( swh_storage, url=EXPECTED_PACKAGES[0]["url"], @@ -118,7 +118,7 @@ def test_get_versions(swh_storage): aur_metadata=EXPECTED_PACKAGES[0]["aur_metadata"], ) - assert loader.get_versions() == [ + assert loader.get_sorted_versions() == [ "10.5.2-1", ] diff --git a/swh/loader/package/bioconductor/loader.py b/swh/loader/package/bioconductor/loader.py index 2d388b65..8cdf79ac 100644 --- a/swh/loader/package/bioconductor/loader.py +++ b/swh/loader/package/bioconductor/loader.py @@ -11,7 +11,6 @@ import string from typing import Any, Dict, Iterator, Optional, Sequence, Tuple import attr -from packaging.version import parse as parse_version from swh.loader.core.utils import cached_method, release_name from swh.loader.package.cran.loader import extract_intrinsic_metadata, parse_date @@ -95,15 +94,7 @@ class BioconductorLoader(PackageLoader[BioconductorPackageInfo]): @cached_method def get_versions(self) -> Sequence[str]: """Sort package versions in ascending order and return them.""" - return list( - sorted( - self.packages, key=lambda p: parse_version(self.packages[p]["version"]) - ) - ) - - def get_default_version(self) -> str: - """Get the latest release version of a bioconductor package""" - return self.get_versions()[-1] + return list(self.packages) def get_package_info( self, version: str diff --git a/swh/loader/package/conda/loader.py b/swh/loader/package/conda/loader.py index a99bd169..f0793071 100644 --- a/swh/loader/package/conda/loader.py +++ b/swh/loader/package/conda/loader.py @@ -10,7 +10,6 @@ from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple import attr import iso8601 -from packaging.version import parse as parse_version import yaml from swh.loader.core.utils import EMPTY_AUTHOR, Person, get_url_body, release_name @@ -103,25 +102,7 @@ class CondaLoader(PackageLoader[CondaPackageInfo]): ["0.1.1", "0.10.2"] """ - versions = list(self.artifacts.keys()) - versions.sort( - key=lambda version_key: parse_version( - version_key.split("/", 1)[1].split("-", 1)[0] - ) - ) - return versions - - def get_default_version(self) -> str: - """Get the newest release version of a Conda package - - Returns: - A string representing a version - - Example:: - - "0.10.2" - """ - return self.get_versions()[-1] + return list(self.artifacts) def get_package_info(self, version: str) -> Iterator[Tuple[str, CondaPackageInfo]]: """Get release name and package information from version diff --git a/swh/loader/package/conda/tests/test_conda.py b/swh/loader/package/conda/tests/test_conda.py index 0e16a51d..f32bb961 100644 --- a/swh/loader/package/conda/tests/test_conda.py +++ b/swh/loader/package/conda/tests/test_conda.py @@ -45,11 +45,11 @@ ORIGINS = [ ] -def test_get_versions(requests_mock_datadir, swh_storage): +def test_get_sorted_versions(requests_mock_datadir, swh_storage): loader = CondaLoader( swh_storage, url=ORIGINS[0]["url"], artifacts=ORIGINS[0]["artifacts"] ) - assert loader.get_versions() == [ + assert loader.get_sorted_versions() == [ "linux-64/0.11.1-py36h9f0ad1d_1", "linux-64/0.11.1-py36hc560c46_1", ] diff --git a/swh/loader/package/cpan/loader.py b/swh/loader/package/cpan/loader.py index f23b5cc2..98d4a142 100644 --- a/swh/loader/package/cpan/loader.py +++ b/swh/loader/package/cpan/loader.py @@ -10,7 +10,6 @@ from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple import attr import iso8601 -from packaging.version import parse as parse_version from requests import HTTPError from swh.loader.core.utils import EMPTY_AUTHOR, Person, get_url_body, release_name @@ -98,21 +97,7 @@ class CpanLoader(PackageLoader[CpanPackageInfo]): ["0.1.1", "0.10.2"] """ - versions = list(self.artifacts.keys()) - versions.sort(key=parse_version) - return versions - - def get_default_version(self) -> str: - """Get the newest release version of a Perl package - - Returns: - A string representing a version - - Example:: - - "0.10.2" - """ - return self.get_versions()[-1] + return list(self.artifacts) def get_package_info(self, version: str) -> Iterator[Tuple[str, CpanPackageInfo]]: """Get release name and package information from version diff --git a/swh/loader/package/cpan/tests/test_cpan.py b/swh/loader/package/cpan/tests/test_cpan.py index 5d142b5f..0ca64a3b 100644 --- a/swh/loader/package/cpan/tests/test_cpan.py +++ b/swh/loader/package/cpan/tests/test_cpan.py @@ -99,8 +99,8 @@ def cpan_loader(requests_mock_datadir, swh_storage): ) -def test_get_versions(cpan_loader): - assert cpan_loader.get_versions() == ["0.01", "0.05"] +def test_get_sorted_versions(cpan_loader): + assert cpan_loader.get_sorted_versions() == ["0.01", "0.05"] def test_get_default_version(cpan_loader): diff --git a/swh/loader/package/cran/loader.py b/swh/loader/package/cran/loader.py index 76efd6d9..d1e0b345 100644 --- a/swh/loader/package/cran/loader.py +++ b/swh/loader/package/cran/loader.py @@ -66,13 +66,7 @@ class CRANLoader(PackageLoader[CRANPackageInfo]): self.artifacts = artifacts def get_versions(self) -> List[str]: - versions = [] - for artifact in self.artifacts: - versions.append(artifact["version"]) - return versions - - def get_default_version(self) -> str: - return self.artifacts[-1]["version"] + return [artifact["version"] for artifact in self.artifacts] def get_package_info(self, version: str) -> Iterator[Tuple[str, CRANPackageInfo]]: for a_metadata in self.artifacts: diff --git a/swh/loader/package/crates/loader.py b/swh/loader/package/crates/loader.py index 935db8a3..7188c682 100644 --- a/swh/loader/package/crates/loader.py +++ b/swh/loader/package/crates/loader.py @@ -12,7 +12,6 @@ from urllib.parse import urlparse import attr import iso8601 -from looseversion import LooseVersion2 import toml from swh.loader.core.utils import ( @@ -142,21 +141,7 @@ class CratesLoader(PackageLoader[CratesPackageInfo]): ["0.1.1", "0.10.2"] """ - versions = list(self.artifacts.keys()) - versions.sort(key=LooseVersion2) - return versions - - def get_default_version(self) -> str: - """Get the newest release version of a crate - - Returns: - A string representing a version - - Example:: - - "0.1.2" - """ - return self.get_versions()[-1] + return list(self.artifacts) def get_metadata_authority(self): p_url = urlparse(self.url) diff --git a/swh/loader/package/crates/tests/test_crates.py b/swh/loader/package/crates/tests/test_crates.py index 92a48abd..54f74f9c 100644 --- a/swh/loader/package/crates/tests/test_crates.py +++ b/swh/loader/package/crates/tests/test_crates.py @@ -36,13 +36,13 @@ def expected(datadir): return json.loads(fp.read_bytes()) -def test_get_versions(requests_mock_datadir, swh_storage, expected): +def test_get_sorted_versions(requests_mock_datadir, swh_storage, expected): loader = CratesLoader( swh_storage, url=expected[1]["url"], artifacts=expected[1]["artifacts"], ) - assert loader.get_versions() == [ + assert loader.get_sorted_versions() == [ "0.1.0", "0.1.1", "0.1.2", diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py index 2f715380..fb404b7a 100644 --- a/swh/loader/package/debian/loader.py +++ b/swh/loader/package/debian/loader.py @@ -13,6 +13,7 @@ import attr from dateutil.parser import parse as parse_date from debian.changelog import Changelog from debian.deb822 import Dsc +from looseversion import LooseVersion2 from swh.loader.core.utils import download, release_name from swh.loader.package.loader import BasePackageInfo, PackageLoader, PartialExtID @@ -182,12 +183,16 @@ class DebianLoader(PackageLoader[DebianPackageInfo]): super().__init__(storage=storage, url=url, **kwargs) self.packages = packages - def get_versions(self) -> Sequence[str]: + def get_sorted_versions(self) -> Sequence[str]: """Returns the keys of the packages input (e.g. stretch/contrib/0.7.2-3, etc...) """ - return list(self.packages.keys()) + return list( + sorted( + self.packages, key=lambda p: LooseVersion2(self.packages[p]["version"]) + ) + ) def get_package_info(self, version: str) -> Iterator[Tuple[str, DebianPackageInfo]]: meta = self.packages[version] diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py index 5e370864..d051aa96 100644 --- a/swh/loader/package/debian/tests/test_debian.py +++ b/swh/loader/package/debian/tests/test_debian.py @@ -119,7 +119,7 @@ def test_debian_first_visit(swh_storage, requests_mock_datadir): ) actual_load_status = loader.load() - expected_snapshot_id = "f9e4d0d200433dc998ad2ca40ee1244785fe6ed1" + expected_snapshot_id = "9033c04b5f77a19e44031a61c175e9c2f760c75a" assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id, @@ -138,10 +138,14 @@ def test_debian_first_visit(swh_storage, requests_mock_datadir): expected_snapshot = Snapshot( id=hash_to_bytes(expected_snapshot_id), branches={ + b"HEAD": SnapshotBranch( + target_type=SnapshotTargetType.ALIAS, + target=b"releases/stretch/contrib/0.7.2-3", + ), b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( target_type=SnapshotTargetType.RELEASE, target=release_id, - ) + ), }, ) # different than the previous loader as no release is done @@ -195,7 +199,7 @@ def test_debian_first_visit_then_another_visit(swh_storage, requests_mock_datadi actual_load_status = loader.load() - expected_snapshot_id = "f9e4d0d200433dc998ad2ca40ee1244785fe6ed1" + expected_snapshot_id = "9033c04b5f77a19e44031a61c175e9c2f760c75a" assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id, @@ -212,10 +216,14 @@ def test_debian_first_visit_then_another_visit(swh_storage, requests_mock_datadi expected_snapshot = Snapshot( id=hash_to_bytes(expected_snapshot_id), branches={ + b"HEAD": SnapshotBranch( + target_type=SnapshotTargetType.ALIAS, + target=b"releases/stretch/contrib/0.7.2-3", + ), b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( target_type=SnapshotTargetType.RELEASE, target=hash_to_bytes("de96ae3d3e136f5c1709117059e2a2c05b8ee5ae"), - ) + ), }, ) # different than the previous loader as no release is done @@ -496,7 +504,7 @@ def _check_debian_loading(swh_storage, packages): ) actual_load_status = loader.load() - expected_snapshot_id = "474c0e3d5796d15363031c333533527d659c559e" + expected_snapshot_id = "d0d31657ce1bb5f042c714925be9556ce69a5a15" assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id, @@ -513,6 +521,10 @@ def _check_debian_loading(swh_storage, packages): expected_snapshot = Snapshot( id=hash_to_bytes(expected_snapshot_id), branches={ + b"HEAD": SnapshotBranch( + target_type=SnapshotTargetType.ALIAS, + target=b"releases/buster/contrib/0.7.2-4", + ), b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( target_type=SnapshotTargetType.RELEASE, target=hash_to_bytes("de96ae3d3e136f5c1709117059e2a2c05b8ee5ae"), diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py index 0867fa02..ab4841c8 100644 --- a/swh/loader/package/deposit/loader.py +++ b/swh/loader/package/deposit/loader.py @@ -177,14 +177,6 @@ class DepositLoader(PackageLoader[DepositPackageInfo]): r["software_version"] for r in self.client.releases_get(self.deposit_id) ] - def get_default_version(self) -> str: - """The default version is the latest release. - - Returns: - A version number - """ - return self.get_versions()[-1] - def generate_branch_name(self, version: str) -> str: """Generate a unique branch name from a version number. diff --git a/swh/loader/package/hackage/loader.py b/swh/loader/package/hackage/loader.py index e9118acb..02a6c15f 100644 --- a/swh/loader/package/hackage/loader.py +++ b/swh/loader/package/hackage/loader.py @@ -11,7 +11,6 @@ import re from typing import Any, Dict, Iterator, Optional, Sequence, Tuple import attr -from packaging.version import parse as parse_version import requests from requests.structures import CaseInsensitiveDict @@ -123,21 +122,7 @@ class HackageLoader(PackageLoader[HackagePackageInfo]): ["0.1.1", "0.10.2"] """ - versions = list(self.info_versions().keys()) - versions.sort(key=parse_version) - return versions - - def get_default_version(self) -> str: - """Get the newest release version of an Haskell package - - Returns: - A string representing a version - - Example:: - - "0.10.2" - """ - return self.get_versions()[-1] + return list(self.info_versions()) def get_package_info( self, version: str diff --git a/swh/loader/package/hackage/tests/test_hackage.py b/swh/loader/package/hackage/tests/test_hackage.py index 94d74d72..ba5259cc 100644 --- a/swh/loader/package/hackage/tests/test_hackage.py +++ b/swh/loader/package/hackage/tests/test_hackage.py @@ -86,12 +86,12 @@ def head_callback(request, context, datadir): return None -def test_get_versions(requests_mock_datadir, swh_storage): +def test_get_sorted_versions(requests_mock_datadir, swh_storage): loader = HackageLoader( swh_storage, url=EXPECTED_PACKAGES[1]["url"], ) - assert loader.get_versions() == [ + assert loader.get_sorted_versions() == [ "0.1", "0.3.0.2", ] diff --git a/swh/loader/package/hex/loader.py b/swh/loader/package/hex/loader.py index 3743a75e..82a75f52 100644 --- a/swh/loader/package/hex/loader.py +++ b/swh/loader/package/hex/loader.py @@ -13,7 +13,6 @@ from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple from urllib.parse import urlparse import attr -from packaging.version import parse as parse_version from swh.loader.core.utils import EMPTY_AUTHOR, get_url_body, release_name from swh.loader.package.loader import ( @@ -83,10 +82,7 @@ class HexLoader(PackageLoader[HexPackageInfo]): self.releases = releases def get_versions(self) -> Sequence[str]: - return list(sorted(self.releases, key=parse_version)) - - def get_default_version(self) -> str: - return self.get_versions()[-1] + return list(self.releases) def get_metadata_authority(self): parsed_url = urlparse(self.url) diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py index 33ad8c07..f5a58101 100644 --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -27,6 +27,7 @@ from typing import ( ) import attr +from looseversion import LooseVersion2 from requests.exceptions import ContentDecodingError import sentry_sdk @@ -182,6 +183,8 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]): def get_versions(self) -> Sequence[str]: """Return the list of all published package versions. + There is no need to sort that list, it is handled by :meth:`get_sorted_versions`. + Raises: class:`swh.loader.exception.NotFound` error when failing to read the published package versions. @@ -192,6 +195,34 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]): """ return [] + def get_sorted_versions(self) -> Sequence[str]: + """Return the list of all published package versions sorted by version numbers. + + Default implementation uses the `looseversion <https://pypi.org/project/looseversion/>`_ + package enabling to interact with heterogeneous version schemes. + + Raises: + class:`swh.loader.exception.NotFound` error when failing to read the + published package versions. + + Returns: + Sequence of sorted published versions + + """ + return list(sorted(self.get_versions(), key=LooseVersion2)) + + def get_default_version(self) -> str: + """Retrieve the latest release version if any. + + Default implementation returns the last element from the list returned + by :meth:`get_sorted_versions`. + + Returns: + Latest version + + """ + return self.get_sorted_versions()[-1] + def get_package_info(self, version: str) -> Iterator[Tuple[str, TPackageInfo]]: """Given a release version of a package, retrieve the associated package information for such version. @@ -217,15 +248,6 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]): """ raise NotImplementedError("build_release") - def get_default_version(self) -> str: - """Retrieve the latest release version if any. - - Returns: - Latest version - - """ - return "" - def last_snapshot(self) -> Optional[Snapshot]: """Retrieve the last snapshot out of the last visit.""" return snapshot_get_latest( @@ -605,7 +627,7 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]): # Get the list of all version names try: - versions = self.get_versions() + versions = self.get_sorted_versions() except NotFound as e: return self.finalize_visit( snapshot=snapshot, diff --git a/swh/loader/package/maven/loader.py b/swh/loader/package/maven/loader.py index 0872c3e2..be5582aa 100644 --- a/swh/loader/package/maven/loader.py +++ b/swh/loader/package/maven/loader.py @@ -158,10 +158,6 @@ class MavenLoader(PackageLoader[MavenPackageInfo]): def get_versions(self) -> Sequence[str]: return list(self.version_artifact) - def get_default_version(self) -> str: - # Default version is the one of the most recent artifact - return max(self.artifacts, key=lambda a: a["time"])["version"] - def get_metadata_authority(self): return MetadataAuthority(type=MetadataAuthorityType.FORGE, url=self.base_url) diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py index 31754aa1..33c7e732 100644 --- a/swh/loader/package/npm/loader.py +++ b/swh/loader/package/npm/loader.py @@ -125,7 +125,7 @@ class NpmLoader(PackageLoader[NpmPackageInfo]): return json.loads(self._raw_info()) def get_versions(self) -> Sequence[str]: - return sorted(list(self.info()["versions"].keys())) + return self.info()["versions"] def get_default_version(self) -> str: return self.info()["dist-tags"].get("latest", "") diff --git a/swh/loader/package/opam/loader.py b/swh/loader/package/opam/loader.py index c6c95472..acc64897 100644 --- a/swh/loader/package/opam/loader.py +++ b/swh/loader/package/opam/loader.py @@ -174,10 +174,6 @@ class OpamLoader(PackageLoader[OpamPackageInfo]): self._opam_init() return self._compute_versions() - def get_default_version(self) -> str: - """Return the most recent version of the package as default.""" - return self._compute_versions()[-1] - def _opam_show_args(self, version: Optional[str]): package = self.get_package_name(version) if version else self.opam_package diff --git a/swh/loader/package/pubdev/loader.py b/swh/loader/package/pubdev/loader.py index ae65f5c7..5a18aa22 100644 --- a/swh/loader/package/pubdev/loader.py +++ b/swh/loader/package/pubdev/loader.py @@ -6,7 +6,6 @@ import json from typing import Dict, Iterator, Optional, Sequence, Tuple import attr -from packaging.version import parse as parse_version from swh.loader.core.utils import ( EMPTY_AUTHOR, @@ -73,24 +72,7 @@ class PubDevLoader(PackageLoader[PubDevPackageInfo]): ["0.1.1", "0.10.2"] """ - versions = list(self.info()["versions"].keys()) - versions.sort( - key=lambda version: parse_version(version.split("-", maxsplit=1)[0]) - ) - return versions - - def get_default_version(self) -> str: - """Get the newest release version of a PubDev package - - Returns: - A string representing a version - - Example:: - - "0.1.2" - """ - latest = self.info()["latest"] - return latest["version"] + return list(self.info()["versions"]) def get_package_info(self, version: str) -> Iterator[Tuple[str, PubDevPackageInfo]]: """Get release name and package information from version diff --git a/swh/loader/package/pubdev/tests/test_pubdev.py b/swh/loader/package/pubdev/tests/test_pubdev.py index 3c1a0aea..dac19739 100644 --- a/swh/loader/package/pubdev/tests/test_pubdev.py +++ b/swh/loader/package/pubdev/tests/test_pubdev.py @@ -44,12 +44,12 @@ EXPECTED_PACKAGES = [ ] -def test_get_versions(requests_mock_datadir, swh_storage): +def test_get_sorted_versions(requests_mock_datadir, swh_storage): loader = PubDevLoader( swh_storage, url=EXPECTED_PACKAGES[1]["url"], ) - assert loader.get_versions() == [ + assert loader.get_sorted_versions() == [ "1.0.0", "3.8.2", ] @@ -61,7 +61,7 @@ def test_sort_loose_versions(requests_mock_datadir, swh_storage): swh_storage, url=EXPECTED_PACKAGES[4]["url"], ) - assert loader.get_versions() == ["0.1.2+4", "0.1.2+5", "0.1.2+6"] + assert loader.get_sorted_versions() == ["0.1.2+4", "0.1.2+5", "0.1.2+6"] def test_sort_loose_versions_1(requests_mock_datadir, swh_storage): @@ -70,7 +70,7 @@ def test_sort_loose_versions_1(requests_mock_datadir, swh_storage): swh_storage, url=EXPECTED_PACKAGES[5]["url"], ) - assert loader.get_versions() == [ + assert loader.get_sorted_versions() == [ "0.0.1", "0.0.2", "0.1.1", @@ -79,9 +79,9 @@ def test_sort_loose_versions_1(requests_mock_datadir, swh_storage): "0.1.4", "0.1.5", "0.2.1", + "0.2.1+3", "0.2.1+hotfix.1", "0.2.1+hotfix.2", - "0.2.1+3", "0.3.1", "0.3.1+1", "0.5.1", diff --git a/swh/loader/package/puppet/loader.py b/swh/loader/package/puppet/loader.py index d5f67eb1..b271f9f2 100644 --- a/swh/loader/package/puppet/loader.py +++ b/swh/loader/package/puppet/loader.py @@ -10,7 +10,6 @@ from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple import attr import iso8601 -from packaging.version import parse as parse_version from swh.loader.core.utils import Person, release_name from swh.loader.package.loader import BasePackageInfo, PackageLoader @@ -77,21 +76,7 @@ class PuppetLoader(PackageLoader[PuppetPackageInfo]): ["0.1.1", "0.10.2"] """ - versions = list(self.artifacts.keys()) - versions.sort(key=parse_version) - return versions - - def get_default_version(self) -> str: - """Get the newest release version of a Puppet module - - Returns: - A string representing a version - - Example:: - - "0.10.2" - """ - return self.get_versions()[-1] + return list(self.artifacts) def get_package_info(self, version: str) -> Iterator[Tuple[str, PuppetPackageInfo]]: """Get release name and package information from version diff --git a/swh/loader/package/puppet/tests/test_puppet.py b/swh/loader/package/puppet/tests/test_puppet.py index f7467c57..d7e361f5 100644 --- a/swh/loader/package/puppet/tests/test_puppet.py +++ b/swh/loader/package/puppet/tests/test_puppet.py @@ -42,11 +42,11 @@ ORIGINS = { } -def test_get_versions(requests_mock_datadir, swh_storage): +def test_get_sorted_versions(requests_mock_datadir, swh_storage): loader = PuppetLoader( swh_storage, url=ORIGINS["url"], artifacts=ORIGINS["artifacts"] ) - assert loader.get_versions() == ["1.0.0", "8.1.0"] + assert loader.get_sorted_versions() == ["1.0.0", "8.1.0"] def test_get_default_version(requests_mock_datadir, swh_storage): diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py index 746af5ff..69d87553 100644 --- a/swh/loader/package/pypi/loader.py +++ b/swh/loader/package/pypi/loader.py @@ -99,7 +99,7 @@ class PyPILoader(PackageLoader[PyPIPackageInfo]): return json.loads(self._raw_info()) def get_versions(self) -> Sequence[str]: - return self.info()["releases"].keys() + return list(self.info()["releases"]) def get_default_version(self) -> str: return self.info()["info"]["version"] diff --git a/swh/loader/package/rpm/loader.py b/swh/loader/package/rpm/loader.py index 9e69648d..6e292f0b 100644 --- a/swh/loader/package/rpm/loader.py +++ b/swh/loader/package/rpm/loader.py @@ -96,10 +96,6 @@ class RpmLoader(PackageLoader[RpmPackageInfo]): """Returns the package versions sorted by build time""" return list(sorted(self.packages, key=lambda p: self.packages[p]["build_time"])) - def get_default_version(self) -> str: - """Get the latest release version of a rpm package""" - return self.get_versions()[-1] - def get_package_info(self, version: str) -> Iterator[Tuple[str, RpmPackageInfo]]: yield ( release_name(version), diff --git a/swh/loader/package/rubygems/loader.py b/swh/loader/package/rubygems/loader.py index 467c90fb..ca00911d 100644 --- a/swh/loader/package/rubygems/loader.py +++ b/swh/loader/package/rubygems/loader.py @@ -9,7 +9,6 @@ import string from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple import attr -from packaging.version import parse as parse_version from swh.loader.core.utils import get_url_body, release_name from swh.loader.package.loader import ( @@ -85,13 +84,7 @@ class RubyGemsLoader(PackageLoader[RubyGemsPackageInfo]): def get_versions(self) -> Sequence[str]: """Return all versions sorted for the gem being loaded""" - versions = list(self.artifacts.keys()) - versions.sort(key=parse_version) - return versions - - def get_default_version(self) -> str: - """Get the newest release version of a gem""" - return self.get_versions()[-1] + return list(self.artifacts) def get_metadata_authority(self): return MetadataAuthority( diff --git a/swh/loader/package/rubygems/tests/test_rubygems.py b/swh/loader/package/rubygems/tests/test_rubygems.py index 984e53b0..d2ad424c 100644 --- a/swh/loader/package/rubygems/tests/test_rubygems.py +++ b/swh/loader/package/rubygems/tests/test_rubygems.py @@ -76,14 +76,14 @@ def head_release_extrinsic_metadata(datadir): ).read_bytes() -def test_get_versions(requests_mock_datadir, swh_storage): +def test_get_sorted_versions(requests_mock_datadir, swh_storage): loader = RubyGemsLoader( swh_storage, url=ORIGIN["url"], artifacts=ORIGIN["artifacts"], rubygem_metadata=ORIGIN["rubygem_metadata"], ) - assert loader.get_versions() == ["0.0.1", "0.0.2"] + assert loader.get_sorted_versions() == ["0.0.1", "0.0.2"] def test_get_default_version(requests_mock_datadir, swh_storage): diff --git a/swh/loader/package/tests/test_loader.py b/swh/loader/package/tests/test_loader.py index 1e520b55..13d230cb 100644 --- a/swh/loader/package/tests/test_loader.py +++ b/swh/loader/package/tests/test_loader.py @@ -114,16 +114,19 @@ def test_loader_origin_visit_success(swh_storage, requests_mock_datadir): loader = StubPackageLoader(swh_storage, ORIGIN_URL) assert loader.load() == { - "snapshot_id": "dcb9ecef64af73f2cdac7f5463cb6dece6b1db61", + "snapshot_id": "ecf2e51174b5d754c76f0c9e42b8d0440f380a16", "status": "eventful", } assert loader.load_status() == {"status": "eventful"} assert loader.visit_status() == "full" - assert set(loader.last_snapshot().branches.keys()) == { + snapshot_branches = { f"branch-{version}".encode() for version in loader.get_versions() } + snapshot_branches.add(b"HEAD") + + assert set(loader.last_snapshot().branches.keys()) == snapshot_branches def test_loader_origin_visit_failure(swh_storage): @@ -295,6 +298,9 @@ def test_load_extids() -> None: b"v3.0": SnapshotBranch( target_type=SnapshotTargetType.RELEASE, target=rel3_swhid.object_id ), + b"HEAD": SnapshotBranch( + target_type=SnapshotTargetType.ALIAS, target=b"v3.0" + ), } ) storage.snapshot_add([last_snapshot]) @@ -373,6 +379,9 @@ def test_load_extids() -> None: b"branch-v4.0": SnapshotBranch( target_type=SnapshotTargetType.RELEASE, target=rel4_swhid.object_id ), + b"HEAD": SnapshotBranch( + target_type=SnapshotTargetType.ALIAS, target=b"branch-v4.0" + ), } ) assert snapshot_get_latest(storage, origin) == snapshot @@ -460,6 +469,9 @@ def test_load_upgrade_from_revision_extids(caplog): b"v2.0": SnapshotBranch( target_type=SnapshotTargetType.REVISION, target=rev2_swhid.object_id ), + b"HEAD": SnapshotBranch( + target_type=SnapshotTargetType.ALIAS, target=b"v2.0" + ), } ) storage.snapshot_add([last_snapshot]) @@ -536,6 +548,9 @@ def test_load_upgrade_from_revision_extids(caplog): b"branch-v3.0": SnapshotBranch( target_type=SnapshotTargetType.RELEASE, target=rel2_swhid.object_id ), + b"HEAD": SnapshotBranch( + target_type=SnapshotTargetType.ALIAS, target=b"branch-v3.0" + ), } ) assert snapshot_get_latest(storage, origin) == snapshot @@ -627,16 +642,17 @@ def test_loader_origin_with_package_info_failure(swh_storage, requests_mock_data loader = StubPackageLoaderWithPackageInfoFailure(swh_storage, ORIGIN_URL) assert loader.load() == { - "snapshot_id": "b4cce7081d661fb7f4d7a1db96e8044b752eb0b0", + "snapshot_id": "24e471673dea04310c5e276b9acb6b0a03fa8642", "status": "eventful", } assert loader.load_status() == {"status": "eventful"} assert loader.visit_status() == "partial" - assert set(loader.last_snapshot().branches.keys()) == { - f"branch-v{i}.0".encode() for i in (1, 3, 4) - } + snapshot_branches = {f"branch-v{i}.0".encode() for i in (1, 3, 4)} + snapshot_branches.add(b"HEAD") + + assert set(loader.last_snapshot().branches.keys()) == snapshot_branches def test_loader_with_dangling_branch_in_last_snapshot( @@ -645,7 +661,7 @@ def test_loader_with_dangling_branch_in_last_snapshot( loader = StubPackageLoader(swh_storage, ORIGIN_URL) assert loader.load() == { - "snapshot_id": "dcb9ecef64af73f2cdac7f5463cb6dece6b1db61", + "snapshot_id": "ecf2e51174b5d754c76f0c9e42b8d0440f380a16", "status": "eventful", } @@ -660,7 +676,7 @@ def test_loader_with_dangling_branch_in_last_snapshot( loader = StubPackageLoaderWithDanglingBranchInLastSnapshot(swh_storage, ORIGIN_URL) assert loader.load() == { - "snapshot_id": "dcb9ecef64af73f2cdac7f5463cb6dece6b1db61", + "snapshot_id": "ecf2e51174b5d754c76f0c9e42b8d0440f380a16", "status": "eventful", } @@ -705,21 +721,22 @@ def test_loader_with_duplicated_releases(swh_storage, requests_mock_datadir, moc assert loader.load() == { "status": "eventful", - "snapshot_id": "a01fb6280bedbfc3609d3b6ae06e70bf2b4186fa", + "snapshot_id": "ee040891f6d714a9b07090bf5804f7376a0438ee", } # versions v{i}.0 and v{i} target the same release so load_release # should have been called once per unique release assert len(load_release.mock_calls) == len(loader.get_versions()) / 2 - snasphot = loader.last_snapshot() + snapshot = loader.last_snapshot() - # all referenced versions should be found in the snapshot - assert len(snasphot.branches) == len(loader.get_versions()) + # all referenced versions should be found in the snapshot plus a HEAD alias branch + assert len(snapshot.branches) == len(loader.get_versions()) + 1 # check branch-v{i}.0 and branch-v{i} target the same release for i in range(1, 5): assert ( - snasphot.branches[f"branch-v{i}.0".encode()] - == snasphot.branches[f"branch-v{i}".encode()] + snapshot.branches[f"branch-v{i}.0".encode()] + == snapshot.branches[f"branch-v{i}".encode()] ) + assert b"HEAD" in snapshot.branches diff --git a/swh/loader/package/tests/test_loader_metadata.py b/swh/loader/package/tests/test_loader_metadata.py index 6e51e1f5..da6e04d6 100644 --- a/swh/loader/package/tests/test_loader_metadata.py +++ b/swh/loader/package/tests/test_loader_metadata.py @@ -29,7 +29,7 @@ from swh.model.model import ( from swh.model.swhids import CoreSWHID, ExtendedSWHID EMPTY_SNAPSHOT_ID = "1a8893e6a86f444e8be8e7bda6cb34fb1735a00e" -FULL_SNAPSHOT_ID = "4ac5730a9393f5099b63a35a17b6c33d36d70c3a" +FULL_SNAPSHOT_ID = "e5d828e36866ad36e06db53bfb41b02c6150b6f7" AUTHORITY = MetadataAuthority( type=MetadataAuthorityType.FORGE, -- GitLab