diff --git a/setup.py b/setup.py index 90c75ebcea427cb907910a03bff367376fbca72f..9c626a8f35dbc94d7f47e6de24725b20cfa0de5b 100755 --- a/setup.py +++ b/setup.py @@ -80,6 +80,7 @@ setup( lister.nuget=swh.lister.nuget:register lister.opam=swh.lister.opam:register lister.packagist=swh.lister.packagist:register + lister.pagure=swh.lister.pagure:register lister.phabricator=swh.lister.phabricator:register lister.pubdev=swh.lister.pubdev:register lister.puppet=swh.lister.puppet:register diff --git a/swh/lister/pagure/__init__.py b/swh/lister/pagure/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bd4e9b24d642f86af5e96eed1c8b633a818c7344 --- /dev/null +++ b/swh/lister/pagure/__init__.py @@ -0,0 +1,12 @@ +# Copyright (C) 2023 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +def register(): + from .lister import PagureLister + + return { + "lister": PagureLister, + "task_modules": ["%s.tasks" % __name__], + } diff --git a/swh/lister/pagure/lister.py b/swh/lister/pagure/lister.py new file mode 100644 index 0000000000000000000000000000000000000000..cfcb978f71ffb82651644c3fd0607aae6fb97bc9 --- /dev/null +++ b/swh/lister/pagure/lister.py @@ -0,0 +1,73 @@ +# Copyright (C) 2023 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from datetime import datetime, timezone +import logging +from typing import Any, Dict, Iterator, List, Optional + +from swh.scheduler.interface import SchedulerInterface +from swh.scheduler.model import ListedOrigin + +from ..pattern import CredentialsType, StatelessLister + +logger = logging.getLogger(__name__) + +ProjectsPage = List[Dict[str, Any]] + + +class PagureLister(StatelessLister[ProjectsPage]): + """List git origins hosted on a Pagure forge.""" + + LISTER_NAME = "pagure" + + API_PROJECTS_ENDPOINT = "/api/0/projects" + + def __init__( + self, + scheduler: SchedulerInterface, + url: Optional[str] = None, + instance: Optional[str] = None, + credentials: CredentialsType = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, + per_page=100, + ): + super().__init__( + scheduler=scheduler, + credentials=credentials, + url=url.rstrip("/") if url else None, + instance=instance, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, + ) + + self.per_page = per_page + self.session.headers.update({"Accept": "application/json"}) + self.url = f"{self.url}{self.API_PROJECTS_ENDPOINT}" + + def get_pages(self) -> Iterator[ProjectsPage]: + url_projects = self.url + while url_projects: + params = ( + {"per_page": self.per_page} if "per_page" not in url_projects else None + ) + response = self.http_request(url_projects, params=params).json() + yield response["projects"] + url_projects = response["pagination"]["next"] + + def get_origins_from_page(self, projects: ProjectsPage) -> Iterator[ListedOrigin]: + assert self.lister_obj.id is not None + + for project in projects: + yield ListedOrigin( + lister_id=self.lister_obj.id, + url=project["full_url"], + visit_type="git", + last_update=datetime.fromtimestamp( + int(project["date_modified"]) + ).replace(tzinfo=timezone.utc), + ) diff --git a/swh/lister/pagure/tasks.py b/swh/lister/pagure/tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..48d0028d1da601c98a3ff9ae78b6e0b8e86b3635 --- /dev/null +++ b/swh/lister/pagure/tasks.py @@ -0,0 +1,20 @@ +# Copyright (C) 2023 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from typing import Dict + +from celery import shared_task + +from .lister import PagureLister + + +@shared_task(name=__name__ + ".PagureListerTask") +def list_pagure(**lister_args) -> Dict[str, int]: + lister = PagureLister.from_configfile(**lister_args) + return lister.run().dict() + + +@shared_task(name=__name__ + ".ping") +def _ping() -> str: + return "OK" diff --git a/swh/lister/pagure/tests/__init__.py b/swh/lister/pagure/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/swh/lister/pagure/tests/data/https_pagure.io/api_0_projects,per_page=2 b/swh/lister/pagure/tests/data/https_pagure.io/api_0_projects,per_page=2 new file mode 100644 index 0000000000000000000000000000000000000000..dd38193fc8a681d23c1e677f0e339fcef291cb9d --- /dev/null +++ b/swh/lister/pagure/tests/data/https_pagure.io/api_0_projects,per_page=2 @@ -0,0 +1,101 @@ +{ + "args": { + "fork": null, + "namespace": null, + "owner": null, + "page": 1, + "pattern": null, + "per_page": 2, + "short": false, + "tags": [], + "username": null + }, + "pagination": { + "first": "https://pagure.io/api/0/projects?per_page=2&page=1", + "last": "https://pagure.io/api/0/projects?per_page=2&page=2", + "next": "https://pagure.io/api/0/projects?per_page=2&page=2", + "page": 1, + "pages": 4921, + "per_page": 2, + "prev": null + }, + "projects": [ + { + "access_groups": { + "admin": [], + "collaborator": [], + "commit": [], + "ticket": [] + }, + "access_users": { + "admin": [], + "collaborator": [], + "commit": [], + "owner": [ + "jg-dev" + ], + "ticket": [] + }, + "close_status": [], + "custom_keys": [], + "date_created": "1642633824", + "date_modified": "1642633824", + "description": "Testing notes and files for ticket 10291", + "full_url": "https://pagure.io/10291-testing", + "fullname": "10291-testing", + "id": 11286, + "milestones": {}, + "name": "10291-testing", + "namespace": null, + "parent": null, + "priorities": {}, + "tags": [], + "url_path": "10291-testing", + "user": { + "full_url": "https://pagure.io/user/jg-dev", + "fullname": "Jason Giddings", + "name": "jg-dev", + "url_path": "user/jg-dev" + } + }, + { + "access_groups": { + "admin": [], + "collaborator": [], + "commit": [], + "ticket": [] + }, + "access_users": { + "admin": [], + "collaborator": [], + "commit": [], + "owner": [ + "ankursinha" + ], + "ticket": [] + }, + "close_status": [], + "custom_keys": [], + "date_created": "1568047513", + "date_modified": "1568047513", + "description": "NeuroFedora presentation for the OSB workshop", + "full_url": "https://pagure.io/neuro-sig/20190909-OSB-workshop-presentation", + "fullname": "neuro-sig/20190909-OSB-workshop-presentation", + "id": 6715, + "milestones": {}, + "name": "20190909-OSB-workshop-presentation", + "namespace": "neuro-sig", + "parent": null, + "priorities": {}, + "tags": [], + "url_path": "neuro-sig/20190909-OSB-workshop-presentation", + "user": { + "full_url": "https://pagure.io/user/ankursinha", + "fullname": "Ankur Sinha", + "name": "ankursinha", + "url_path": "user/ankursinha" + } + } + ], + "total_projects": 9842 +} \ No newline at end of file diff --git a/swh/lister/pagure/tests/data/https_pagure.io/api_0_projects,per_page=2,page=2 b/swh/lister/pagure/tests/data/https_pagure.io/api_0_projects,per_page=2,page=2 new file mode 100644 index 0000000000000000000000000000000000000000..b236c7e4f2e899abfd7637c58c698e1db8c3fd90 --- /dev/null +++ b/swh/lister/pagure/tests/data/https_pagure.io/api_0_projects,per_page=2,page=2 @@ -0,0 +1,103 @@ +{ + "args": { + "fork": null, + "namespace": null, + "owner": null, + "page": 2, + "pattern": null, + "per_page": 2, + "short": false, + "tags": [], + "username": null + }, + "pagination": { + "first": "https://pagure.io/api/0/projects?per_page=2&page=1", + "last": "https://pagure.io/api/0/projects?per_page=2&page=2", + "next": null, + "page": 2, + "pages": 4921, + "per_page": 2, + "prev": "https://pagure.io/api/0/projects?per_page=2&page=1" + }, + "projects": [ + { + "access_groups": { + "admin": [], + "collaborator": [], + "commit": [], + "ticket": [] + }, + "access_users": { + "admin": [], + "collaborator": [], + "commit": [], + "owner": [ + "ankursinha" + ], + "ticket": [] + }, + "close_status": [], + "custom_keys": [], + "date_created": "1568047513", + "date_modified": "1568047513", + "description": "NeuroFedora presentation for the OSB workshop", + "full_url": "https://pagure.io/neuro-sig/20190909-OSB-workshop-presentation", + "fullname": "neuro-sig/20190909-OSB-workshop-presentation", + "id": 6715, + "milestones": {}, + "name": "20190909-OSB-workshop-presentation", + "namespace": "neuro-sig", + "parent": null, + "priorities": {}, + "tags": [], + "url_path": "neuro-sig/20190909-OSB-workshop-presentation", + "user": { + "full_url": "https://pagure.io/user/ankursinha", + "fullname": "Ankur Sinha", + "name": "ankursinha", + "url_path": "user/ankursinha" + } + }, + { + "access_groups": { + "admin": [ + "neuro-sig" + ], + "collaborator": [], + "commit": [], + "ticket": [] + }, + "access_users": { + "admin": [], + "collaborator": [], + "commit": [], + "owner": [ + "ankursinha" + ], + "ticket": [] + }, + "close_status": [], + "custom_keys": [], + "date_created": "1564047918", + "date_modified": "1565348955", + "description": "Presentation slides for NeuroFedora talk at Flock", + "full_url": "https://pagure.io/neuro-sig/2019-flock-neurofedora", + "fullname": "neuro-sig/2019-flock-neurofedora", + "id": 6523, + "milestones": {}, + "name": "2019-flock-neurofedora", + "namespace": "neuro-sig", + "parent": null, + "priorities": {}, + "tags": [], + "url_path": "neuro-sig/2019-flock-neurofedora", + "user": { + "full_url": "https://pagure.io/user/ankursinha", + "fullname": "Ankur Sinha", + "name": "ankursinha", + "url_path": "user/ankursinha" + } + } + ], + "total_projects": 9842 +} \ No newline at end of file diff --git a/swh/lister/pagure/tests/test_lister.py b/swh/lister/pagure/tests/test_lister.py new file mode 100644 index 0000000000000000000000000000000000000000..d0749d9ad7662ea0b6e66aaff3cb36aa10ef1f3d --- /dev/null +++ b/swh/lister/pagure/tests/test_lister.py @@ -0,0 +1,34 @@ +# Copyright (C) 2023 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + +from swh.lister.pagure.lister import PagureLister + +expected_origins = { + "https://pagure.io/10291-testing", + "https://pagure.io/neuro-sig/20190909-OSB-workshop-presentation", + "https://pagure.io/neuro-sig/2019-flock-neurofedora", +} + + +@pytest.mark.parametrize( + "params", [{"url": "https://pagure.io"}, {"instance": "pagure.io"}] +) +def test_pagure_lister(requests_mock_datadir, swh_scheduler, params): + lister = PagureLister(**params, scheduler=swh_scheduler, per_page=2) + res = lister.run() + + assert res.pages == 2 + assert res.origins == 3 + + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + + assert len(scheduler_origins) == len(expected_origins) + + for origin in scheduler_origins: + assert origin.visit_type == "git" + assert origin.url in expected_origins + assert origin.last_update is not None diff --git a/swh/lister/pagure/tests/test_tasks.py b/swh/lister/pagure/tests/test_tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..a74cde43c7912e0597f94191cf4e4910f5c8a2f7 --- /dev/null +++ b/swh/lister/pagure/tests/test_tasks.py @@ -0,0 +1,31 @@ +# Copyright (C) 2023 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.lister.pattern import ListerStats + + +def test_pagure_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker): + res = swh_scheduler_celery_app.send_task("swh.lister.pagure.tasks.ping") + assert res + res.wait() + assert res.successful() + assert res.result == "OK" + + +def test_pagure_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker): + # setup the mocked PubDevLister + lister = mocker.patch("swh.lister.pagure.tasks.PagureLister") + lister.from_configfile.return_value = lister + stats = ListerStats(pages=42, origins=42) + lister.run.return_value = stats + + res = swh_scheduler_celery_app.send_task("swh.lister.pagure.tasks.PagureListerTask") + assert res + res.wait() + assert res.successful() + assert res.result == stats.dict() + + lister.from_configfile.assert_called_once_with() + lister.run.assert_called_once_with() diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py index 6fe2ccae0342160c16983b16ac55630a130deed4..19411c99c599b93d68079fb13f95e02f3a4d81a2 100644 --- a/swh/lister/tests/test_cli.py +++ b/swh/lister/tests/test_cli.py @@ -41,6 +41,7 @@ lister_args = { "fedora": { "url": "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/", }, + "pagure": {"instance": "pagure.io"}, }