Skip to content
Snippets Groups Projects
Commit 6ad61aec authored by Kumar Shivendu's avatar Kumar Shivendu
Browse files

feat(fedora): Introduce fedora lister

Summary: Lister to ingest fedora mirrors (.rpm)

Reviewers: #reviewers, vlorentz

Subscribers: vlorentz, olasd

Maniphest Tasks: T4448

Differential Revision: https://forge.softwareheritage.org/D8386
parent ea146ce2
No related branches found
No related tags found
No related merge requests found
Showing
with 729 additions and 0 deletions
......@@ -26,6 +26,7 @@ following Python modules:
- `swh.lister.pypi`
- `swh.lister.tuleap`
- `swh.lister.gogs`
- `swh.liser.fedora`
Dependencies
------------
......
......@@ -48,3 +48,9 @@ ignore_missing_imports = True
[mypy-psycopg2.*]
ignore_missing_imports = True
[mypy-repomd.*]
ignore_missing_imports = True
[mypy-defusedxml.*]
ignore_missing_imports = True
......@@ -9,3 +9,4 @@ lxml
dulwich
testing.postgresql
psycopg2
repomd
......@@ -86,6 +86,7 @@ setup(
lister.tuleap=swh.lister.tuleap:register
lister.maven=swh.lister.maven:register
lister.gogs=swh.lister.gogs:register
lister.fedora=swh.lister.fedora:register
""",
classifiers=[
"Programming Language :: Python :: 3",
......
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
def register():
from .lister import FedoraLister
return {
"lister": FedoraLister,
"task_modules": [f"{__name__}.tasks"],
}
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from dataclasses import dataclass, field
from datetime import datetime, timezone
import logging
from typing import Any, Dict, Iterator, List, Set, Type
from urllib.error import HTTPError
from urllib.parse import urljoin
import repomd
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
from ..pattern import Lister
logger = logging.getLogger(__name__)
Release = int
Edition = str
PkgName = str
PkgVersion = str
FedoraOrigin = str
FedoraPageType = Type[repomd.Repo]
"""Each page is a list of packages from a given Fedora (release, edition) pair"""
def get_editions(release: Release) -> List[Edition]:
"""Get list of editions for a given release."""
# Ignore dirs that don't contain .rpm files:
# Docker,CloudImages,Atomic*,Spins,Live,Cloud_Atomic,Silverblue
if release < 20:
return ["Everything", "Fedora"]
elif release < 28:
return ["Everything", "Server", "Workstation"]
else:
return ["Everything", "Server", "Workstation", "Modular"]
def get_last_modified(pkg: repomd.Package) -> datetime:
"""Get timezone aware last modified time in UTC from RPM package metadata."""
ts = pkg._element.find("common:time", namespaces=repomd._ns).get("build")
return datetime.utcfromtimestamp(int(ts)).replace(tzinfo=timezone.utc)
def get_checksums(pkg: repomd.Package) -> Dict[str, str]:
"""Get checksums associated to rpm archive."""
cs = pkg._element.find("common:checksum", namespaces=repomd._ns)
cs_type = cs.get("type")
if cs_type == "sha":
cs_type = "sha1"
return {cs_type: cs.text}
@dataclass
class FedoraListerState:
"""State of Fedora lister"""
package_versions: Dict[PkgName, Set[PkgVersion]] = field(default_factory=dict)
"""Dictionary mapping a package name to all the versions found during
last listing"""
class FedoraLister(Lister[FedoraListerState, FedoraPageType]):
"""
List source packages for given Fedora releases.
The lister will create a snapshot for each package name from all its
available versions.
If a package snapshot is different from the last listing operation,
it will be sent to the scheduler that will create a loading task
to archive newly found source code.
Args:
scheduler: instance of SchedulerInterface
url: fedora package archives mirror URL
releases: list of fedora releases to process
"""
LISTER_NAME = "fedora"
def __init__(
self,
scheduler: SchedulerInterface,
instance: str = "fedora",
url: str = "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/",
releases: List[Release] = [34, 35, 36],
):
super().__init__(
scheduler=scheduler,
url=url,
instance=instance,
credentials={},
)
self.releases = releases
self.listed_origins: Dict[FedoraOrigin, ListedOrigin] = {}
"will hold all listed origins info"
self.origins_to_send: Set[FedoraOrigin] = set()
"will hold updated origins since last listing"
self.package_versions: Dict[PkgName, Set[PkgVersion]] = {}
"will contain the lister state after a call to run"
self.last_page = False
def state_from_dict(self, d: Dict[str, Any]) -> FedoraListerState:
return FedoraListerState(package_versions={k: set(v) for k, v in d.items()})
def state_to_dict(self, state: FedoraListerState) -> Dict[str, Any]:
return {k: list(v) for k, v in state.package_versions.items()}
def page_request(self, release: Release, edition: Edition) -> FedoraPageType:
"""Return parsed packages for a given fedora release."""
index_url = urljoin(
self.url,
f"{release}/{edition}/source/SRPMS/"
if release < 24
else f"{release}/{edition}/source/tree/",
)
repo = repomd.load(index_url) # throws error if no repomd.xml is not found
self.last_page = (
release == self.releases[-1] and edition == get_editions(release)[-1]
)
logger.debug(
"Fetched metadata from url: %s, found %d packages", index_url, len(repo)
)
# TODO: Extract more fields like "provides" and "requires" from *primary.xml
# as extrinsic metadata using the pkg._element.findtext method
return repo
def get_pages(self) -> Iterator[FedoraPageType]:
"""Return an iterator on parsed fedora packages, one page per (release, edition) pair"""
for release in self.releases:
for edition in get_editions(release):
logger.debug("Listing fedora release %s edition %s", release, edition)
self.current_release = release
self.current_edition = edition
try:
yield self.page_request(release, edition)
except HTTPError as http_error:
if http_error.getcode() == 404:
logger.debug(
"No packages metadata found for fedora release %s edition %s",
release,
edition,
)
continue
raise
def origin_url_for_package(self, package_name: PkgName) -> FedoraOrigin:
"""Return the origin url for the given package"""
return f"https://src.fedoraproject.org/rpms/{package_name}"
def get_origins_from_page(self, page: FedoraPageType) -> Iterator[ListedOrigin]:
"""Convert a page of fedora package sources into an iterator of ListedOrigin."""
assert self.lister_obj.id is not None
origins_to_send = set()
# iterate on each package's metadata
for pkg_metadata in page:
# extract package metadata
package_name = pkg_metadata.name
package_version = pkg_metadata.version
package_build_time = get_last_modified(pkg_metadata)
package_download_path = pkg_metadata.location
# build origin url
origin_url = self.origin_url_for_package(package_name)
# create package version key as expected by the fedora (rpm) loader
package_version_key = pkg_metadata.vr
# this is the first time a package is listed
if origin_url not in self.listed_origins:
# create a ListedOrigin object for it that can be later
# updated with new package versions info
self.listed_origins[origin_url] = ListedOrigin(
lister_id=self.lister_obj.id,
url=origin_url,
visit_type="rpm",
extra_loader_arguments={"packages": {}},
last_update=package_build_time,
)
# init set that will contain all listed package versions
self.package_versions[package_name] = set()
# origin will be yielded at the end of that method
origins_to_send.add(origin_url)
# update package metadata in parameter that will be provided
# to the rpm loader
self.listed_origins[origin_url].extra_loader_arguments["packages"][
package_version_key
] = {
"name": package_name,
"version": package_version,
"url": urljoin(page.baseurl, package_download_path),
"release": self.current_release,
"edition": self.current_edition,
"buildTime": package_build_time.isoformat(),
"checksums": get_checksums(pkg_metadata),
}
last_update = self.listed_origins[origin_url].last_update
if last_update is not None and package_build_time > last_update:
self.listed_origins[origin_url].last_update = package_build_time
# add package version key to the set of found versions
self.package_versions[package_name].add(package_version_key)
# package has already been listed during a previous listing process
if package_name in self.state.package_versions:
new_versions = (
self.package_versions[package_name]
- self.state.package_versions[package_name]
)
# no new versions so far, no need to send the origin to the scheduler
if not new_versions:
origins_to_send.remove(origin_url)
logger.debug(
"Found %s packages to update (new ones or packages with new versions).",
len(origins_to_send),
)
logger.debug(
"Current total number of listed packages is equal to %s.",
len(self.listed_origins),
)
# yield from origins_to_send.values()
self.origins_to_send.update(origins_to_send)
if self.last_page:
# yield listed origins when all fedora releases and editions processed
yield from [
self.listed_origins[origin_url] for origin_url in self.origins_to_send
]
def finalize(self):
# set mapping between listed package names and versions as lister state
self.state.package_versions = self.package_versions
self.updated = len(self.listed_origins) > 0
# Copyright (C) 2022 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Dict
from celery import shared_task
from .lister import FedoraLister
@shared_task(name=__name__ + ".FullFedoraRelister")
def list_fedora_full(**lister_args) -> Dict[str, int]:
"""Full update of a Fedora instance"""
lister = FedoraLister.from_configfile(**lister_args)
return lister.run().dict()
@shared_task(name=__name__ + ".ping")
def _ping() -> str:
return "OK"
File added
File added
File added
<?xml version="1.0" encoding="UTF-8"?>
<repomd xmlns="http://linux.duke.edu/metadata/repo" xmlns:rpm="http://linux.duke.edu/metadata/rpm">
<revision>1499286311</revision>
<data type="primary">
<checksum type="sha256">4f677623c24912d86848f86837d398979b5adc2a51d9a2170f11fe42a257f3d3</checksum>
<open-checksum type="sha256">db616ad8e4219e23dfc05cd515e017cdc0d59144689ac606951fa42cbb06ae65</open-checksum>
<location href="repodata/4f677623c24912d86848f86837d398979b5adc2a51d9a2170f11fe42a257f3d3-primary.xml.gz"/>
<timestamp>1499286305</timestamp>
<size>5425131</size>
<open-size>30064034</open-size>
</data>
<data type="filelists">
<checksum type="sha256">17296af99a4b80bc67fccabe71ecefa02b76e8409372d936c054b8c9de312b6c</checksum>
<open-checksum type="sha256">7caabd1205a72d26422756211dcd536336cef643f7f73eb15a470b02ff09a194</open-checksum>
<location href="repodata/17296af99a4b80bc67fccabe71ecefa02b76e8409372d936c054b8c9de312b6c-filelists.xml.gz"/>
<timestamp>1499286305</timestamp>
<size>1650273</size>
<open-size>6419422</open-size>
</data>
<data type="other">
<checksum type="sha256">8f1ed139aeaa57f5bc280ce97b82f690e4008c122b4793791ca18e513268b6eb</checksum>
<open-checksum type="sha256">786b8d4fa759f0ade3eaab1bde390d12c950dfe217eda1773400f3a3d461522b</open-checksum>
<location href="repodata/8f1ed139aeaa57f5bc280ce97b82f690e4008c122b4793791ca18e513268b6eb-other.xml.gz"/>
<timestamp>1499286305</timestamp>
<size>4396102</size>
<open-size>33165783</open-size>
</data>
<data type="primary_db">
<checksum type="sha256">1d2c0be48c35e55669b410cb4dbe767ae4850b4c610e95ca9aee67f7eb31e457</checksum>
<open-checksum type="sha256">dc8dbac072ac1412f0ecface57fa57c5ddcac14acc880fe9b467164be733e963</open-checksum>
<location href="repodata/1d2c0be48c35e55669b410cb4dbe767ae4850b4c610e95ca9aee67f7eb31e457-primary.sqlite.bz2"/>
<timestamp>1499286309</timestamp>
<size>7071217</size>
<open-size>26177536</open-size>
<database_version>10</database_version>
</data>
<data type="filelists_db">
<checksum type="sha256">5e1259759b9bedefc1ff14b81760524841402776e6c1b33014f4f5d6feb40d11</checksum>
<open-checksum type="sha256">b293d51dd4e6eb4128e40b6ce228c62b169b1d47be535e56f69b8ad622c4a6ca</open-checksum>
<location href="repodata/5e1259759b9bedefc1ff14b81760524841402776e6c1b33014f4f5d6feb40d11-filelists.sqlite.bz2"/>
<timestamp>1499286307</timestamp>
<size>2227395</size>
<open-size>5529600</open-size>
<database_version>10</database_version>
</data>
<data type="other_db">
<checksum type="sha256">f6b30bdfe96d2137542704288de1345c01ea14397eb187126d4474648bad5292</checksum>
<open-checksum type="sha256">3f5d4619dcabe945b773c1c98ea40b8ead53340291bd504ab3faabfc7b57bb99</open-checksum>
<location href="repodata/f6b30bdfe96d2137542704288de1345c01ea14397eb187126d4474648bad5292-other.sqlite.bz2"/>
<timestamp>1499286311</timestamp>
<size>5264843</size>
<open-size>27930624</open-size>
<database_version>10</database_version>
</data>
</repomd>
<?xml version="1.0" encoding="UTF-8"?>
<repomd xmlns="http://linux.duke.edu/metadata/repo" xmlns:rpm="http://linux.duke.edu/metadata/rpm">
<revision>1651698851</revision>
<data type="primary">
<checksum type="sha256">42155056c6d7b1f0e5437bb2a92c48e6d21a02ee8f09acc726e705c26e960a3c</checksum>
<open-checksum type="sha256">a5841e7086be579d58e2dbb7628caebba32d9defa85739455d518bfaf90e39b0</open-checksum>
<location href="repodata/42155056c6d7b1f0e5437bb2a92c48e6d21a02ee8f09acc726e705c26e960a3c-primary.xml.gz"/>
<timestamp>1651698827</timestamp>
<size>7144060</size>
<open-size>45898728</open-size>
</data>
<data type="filelists">
<checksum type="sha256">fc915adcdf5710f9f80dfffcec8f03088f09cf80fbc9c801d5a8f45f1f31bb92</checksum>
<open-checksum type="sha256">a96a4739268e250e3c3461da716472503ed5ed8b27161fec9a143d4a8ccf5767</open-checksum>
<location href="repodata/fc915adcdf5710f9f80dfffcec8f03088f09cf80fbc9c801d5a8f45f1f31bb92-filelists.xml.gz"/>
<timestamp>1651698827</timestamp>
<size>1934835</size>
<open-size>7458268</open-size>
</data>
<data type="other">
<checksum type="sha256">461db9fa87e564d75d74c0dfbf006ea5d18ed646d4cb8dee1c69a4d95dd08d09</checksum>
<open-checksum type="sha256">1733c3011a0323fadac711dd25176c9934698176605c3e516b6aabb9b5775e00</open-checksum>
<location href="repodata/461db9fa87e564d75d74c0dfbf006ea5d18ed646d4cb8dee1c69a4d95dd08d09-other.xml.gz"/>
<timestamp>1651698827</timestamp>
<size>3779969</size>
<open-size>33166564</open-size>
</data>
<data type="primary_db">
<checksum type="sha256">ac60dd254bfc7557eb646a116bf8083b49fee8e942e1ef50dff7f74004897e74</checksum>
<open-checksum type="sha256">c752f5132f2cc5f4f137dade787154316f9503ae816212b8fabf5733cc2d344d</open-checksum>
<location href="repodata/ac60dd254bfc7557eb646a116bf8083b49fee8e942e1ef50dff7f74004897e74-primary.sqlite.xz"/>
<timestamp>1651698851</timestamp>
<size>9058624</size>
<open-size>41562112</open-size>
<database_version>10</database_version>
</data>
<data type="filelists_db">
<checksum type="sha256">1a279b88531d9c2e24c0bfc9a0d6b4357d70301c24fa42f649c726ed1af1d6a8</checksum>
<open-checksum type="sha256">e9b5c17e6004a78d20146aa54fa5ac93a01f4f2a95117588d649e92cfc008473</open-checksum>
<location href="repodata/1a279b88531d9c2e24c0bfc9a0d6b4357d70301c24fa42f649c726ed1af1d6a8-filelists.sqlite.xz"/>
<timestamp>1651698834</timestamp>
<size>1809496</size>
<open-size>6471680</open-size>
<database_version>10</database_version>
</data>
<data type="other_db">
<checksum type="sha256">850ad17efdebe5f9ccbef03c8aec4e7589bb6a1ca9a6249578968d60ad094a4f</checksum>
<open-checksum type="sha256">d13c6da8f7ad2c9060fd5b811b86facc9e926ec9273c0e135c4fe1110f784cdc</open-checksum>
<location href="repodata/850ad17efdebe5f9ccbef03c8aec4e7589bb6a1ca9a6249578968d60ad094a4f-other.sqlite.xz"/>
<timestamp>1651698838</timestamp>
<size>4285108</size>
<open-size>27897856</open-size>
<database_version>10</database_version>
</data>
<data type="primary_zck">
<checksum type="sha256">fc4205cf1cca7f0c157d1aa9a1348a1742ca7df671fbf7ccccd79221d473145b</checksum>
<open-checksum type="sha256">a5841e7086be579d58e2dbb7628caebba32d9defa85739455d518bfaf90e39b0</open-checksum>
<header-checksum type="sha256">2074f3da25ad0d45cf2776ad35dd22a6c63fafff319143c2f7dfefa98b99d651</header-checksum>
<location href="repodata/fc4205cf1cca7f0c157d1aa9a1348a1742ca7df671fbf7ccccd79221d473145b-primary.xml.zck"/>
<timestamp>1651698828</timestamp>
<size>6030441</size>
<open-size>45898728</open-size>
<header-size>231</header-size>
</data>
<data type="filelists_zck">
<checksum type="sha256">6c77673bb8823bf04fd4520c421fd0fc84567db9f23b8aa19f600b0688e46dd9</checksum>
<open-checksum type="sha256">a96a4739268e250e3c3461da716472503ed5ed8b27161fec9a143d4a8ccf5767</open-checksum>
<header-checksum type="sha256">55fc5e75acd903f01cf18328fec9c6f995bd8f80c5b085aa3e0fe116bb89e891</header-checksum>
<location href="repodata/6c77673bb8823bf04fd4520c421fd0fc84567db9f23b8aa19f600b0688e46dd9-filelists.xml.zck"/>
<timestamp>1651698829</timestamp>
<size>1735208</size>
<open-size>7458268</open-size>
<header-size>136</header-size>
</data>
<data type="other_zck">
<checksum type="sha256">c87c1b085ef287ba69b1f244d3fff56fc5efc01ffd1d7c10ee22328117651cd5</checksum>
<open-checksum type="sha256">1733c3011a0323fadac711dd25176c9934698176605c3e516b6aabb9b5775e00</open-checksum>
<header-checksum type="sha256">93624d227c24ff4eb2332fcb038e7157e08ed051b654820def75c5511a1ce191</header-checksum>
<location href="repodata/c87c1b085ef287ba69b1f244d3fff56fc5efc01ffd1d7c10ee22328117651cd5-other.xml.zck"/>
<timestamp>1651698829</timestamp>
<size>3019451</size>
<open-size>33166564</open-size>
<header-size>206</header-size>
</data>
</repomd>
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from io import StringIO
from pathlib import Path
from typing import List
from unittest.mock import MagicMock
from urllib.error import HTTPError
import pytest
from swh.lister.fedora.lister import FedoraLister, Release, get_editions
from swh.scheduler.interface import SchedulerInterface
def mock_repomd(datadir, mocker, use_altered_fedora36=False):
"""Mocks the .xml files fetched by repomd for the next lister run"""
paths = ["repomd26.xml", "primary26.xml.gz", "repomd36.xml", "primary36.xml.gz"]
if use_altered_fedora36:
paths[3] = "primary36-altered.xml.gz"
cm = MagicMock()
cm.read.side_effect = [
Path(datadir, "archives.fedoraproject.org", path).read_bytes() for path in paths
]
cm.__enter__.return_value = cm
mocker.patch("repomd.urllib.request.urlopen").return_value = cm
def rpm_url(release, path):
return (
"https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/"
f"{release}/Everything/source/tree/Packages/{path}"
)
@pytest.fixture
def pkg_versions():
return {
"https://src.fedoraproject.org/rpms/0install": {
"2.11-4.fc26": {
"name": "0install",
"version": "2.11",
"release": 26,
"edition": "Everything",
"buildTime": "2017-02-10T04:59:31+00:00",
"url": rpm_url(26, "0/0install-2.11-4.fc26.src.rpm"),
"checksums": {
# note: we intentionally altered the original
# primary26.xml file to test sha1 usage
"sha1": "a6fdef5d1026dea208eeeba148f55ac2f545989b",
},
}
},
"https://src.fedoraproject.org/rpms/0xFFFF": {
"0.3.9-15.fc26": {
"name": "0xFFFF",
"version": "0.3.9",
"release": 26,
"edition": "Everything",
"buildTime": "2017-02-10T05:01:53+00:00",
"url": rpm_url(26, "0/0xFFFF-0.3.9-15.fc26.src.rpm"),
"checksums": {
"sha256": "96f9c163c0402d2b30e5343c8397a6d50e146c85a446804396b119ef9698231f"
},
},
"0.9-4.fc36": {
"name": "0xFFFF",
"version": "0.9",
"release": 36,
"edition": "Everything",
"buildTime": "2022-01-19T19:13:53+00:00",
"url": rpm_url(36, "0/0xFFFF-0.9-4.fc36.src.rpm"),
"checksums": {
"sha256": "45eee8d990d502324ae665233c320b8a5469c25d735f1862e094c1878d6ff2cd"
},
},
},
"https://src.fedoraproject.org/rpms/2ping": {
"4.5.1-2.fc36": {
"name": "2ping",
"version": "4.5.1",
"release": 36,
"edition": "Everything",
"buildTime": "2022-01-19T19:12:21+00:00",
"url": rpm_url(36, "2/2ping-4.5.1-2.fc36.src.rpm"),
"checksums": {
"sha256": "2ce028d944ebea1cab8c6203c9fed882792478b42fc34682b886a9db16e9de28"
},
}
},
}
def run_lister(
swh_scheduler: SchedulerInterface,
releases: List[Release],
pkg_versions: dict,
origin_count: int,
updated: bool = True,
):
"""Runs the lister and tests that the listed origins are correct."""
lister = FedoraLister(scheduler=swh_scheduler, releases=releases)
stats = lister.run()
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
lister_state = lister.get_state_from_scheduler()
state_pkg_versions = {k.split("/")[-1]: set(v) for k, v in pkg_versions.items()}
# One edition from each release (we mocked get_editions)
assert stats.pages == (len(releases) if updated else 0)
assert stats.origins == origin_count
assert {
o.url: o.extra_loader_arguments["packages"] for o in scheduler_origins
} == pkg_versions
assert lister_state.package_versions == state_pkg_versions
assert lister.updated == updated
def test_get_editions():
assert get_editions(18) == ["Everything", "Fedora"]
assert get_editions(26) == ["Everything", "Server", "Workstation"]
assert get_editions(34) == ["Everything", "Server", "Workstation", "Modular"]
@pytest.mark.parametrize("status_code", [400, 404, 500])
def test_fedora_lister_http_error(
swh_scheduler: SchedulerInterface, mocker: MagicMock, status_code: int
):
"""
Simulates handling of HTTP Errors while fetching of packages for fedora releases.
"""
releases = [18]
is_404 = status_code == 404
def side_effect(url):
if is_404:
raise HTTPError(
url, status_code, "Not Found", {"content-type": "text/html"}, StringIO()
)
else:
raise HTTPError(
url,
status_code,
"Internal server error",
{"content-type": "text/html"},
StringIO(),
)
urlopen_patch = mocker.patch("repomd.urllib.request.urlopen")
urlopen_patch.side_effect = side_effect
expected_pkgs: dict = {}
if is_404:
run_lister(
swh_scheduler, releases, expected_pkgs, origin_count=0, updated=False
)
else:
with pytest.raises(HTTPError):
run_lister(
swh_scheduler, releases, expected_pkgs, origin_count=0, updated=False
)
def test_full_lister_fedora(
swh_scheduler: SchedulerInterface,
mocker: MagicMock,
datadir: Path,
pkg_versions: dict,
):
"""
Simulates a full listing of packages for fedora releases.
"""
releases = [26, 36]
get_editions_patch = mocker.patch("swh.lister.fedora.lister.get_editions")
get_editions_patch.return_value = ["Everything"]
mock_repomd(datadir, mocker)
run_lister(swh_scheduler, releases, pkg_versions, origin_count=3)
def test_incremental_lister(
swh_scheduler: SchedulerInterface,
mocker: MagicMock,
datadir: Path,
pkg_versions: dict,
):
"""
Simulates an incremental listing of packages for fedora releases.
"""
releases = [26, 36]
get_editions_patch = mocker.patch("swh.lister.fedora.lister.get_editions")
get_editions_patch.return_value = ["Everything"]
# First run
mock_repomd(datadir, mocker)
run_lister(swh_scheduler, releases, pkg_versions, origin_count=3)
# Second run (no updates)
mock_repomd(datadir, mocker)
run_lister(swh_scheduler, releases, pkg_versions, origin_count=0)
# Use an altered version of primary36.xml in which we updated the version
# of package 0xFFFF to 0.10:
mock_repomd(datadir, mocker, use_altered_fedora36=True)
# Add new version to the set of expected pkg versions:
pkg_versions["https://src.fedoraproject.org/rpms/0xFFFF"].update(
{
"0.10-4.fc36": {
"name": "0xFFFF",
"version": "0.10",
"release": 36,
"edition": "Everything",
"buildTime": "2022-01-19T19:13:53+00:00",
"url": rpm_url(36, "0/0xFFFF-0.10-4.fc36.src.rpm"),
"checksums": {
"sha256": "45eee8d990d502324ae665233c320b8a5469c25d735f1862e094c1878d6ff2cd"
},
}
}
)
# Third run (0xFFFF in fedora36 editions got updated and it needs to be listed)
run_lister(swh_scheduler, releases, pkg_versions, origin_count=1)
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from unittest.mock import patch
from swh.lister.pattern import ListerStats
def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
res = swh_scheduler_celery_app.send_task("swh.lister.fedora.tasks.ping")
assert res
res.wait()
assert res.successful()
assert res.result == "OK"
@patch("swh.lister.fedora.tasks.FedoraLister")
def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
lister.from_configfile.return_value = lister
lister.run.return_value = ListerStats(pages=10, origins=500)
kwargs = dict(
url="https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/"
)
res = swh_scheduler_celery_app.send_task(
"swh.lister.fedora.tasks.FullFedoraRelister",
kwargs=kwargs,
)
assert res
res.wait()
assert res.successful()
lister.from_configfile.assert_called_once_with(**kwargs)
lister.run.assert_called_once_with()
@patch("swh.lister.fedora.tasks.FedoraLister")
def test_full_listing_params(
lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
):
lister.from_configfile.return_value = lister
lister.run.return_value = ListerStats(pages=10, origins=500)
kwargs = dict(
url="https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/",
instance="archives.fedoraproject.org",
releases=["36"],
)
res = swh_scheduler_celery_app.send_task(
"swh.lister.fedora.tasks.FullFedoraRelister",
kwargs=kwargs,
)
assert res
res.wait()
assert res.successful()
lister.from_configfile.assert_called_once_with(**kwargs)
lister.run.assert_called_once_with()
......@@ -39,6 +39,9 @@ lister_args = {
"url": "https://guix.gnu.org/sources.json",
"origin_upstream": "https://git.savannah.gnu.org/cgit/guix.git/",
},
"fedora": {
"url": "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases//",
},
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment