Skip to content
Snippets Groups Projects
Commit d34a6232 authored by Kumar Shivendu's avatar Kumar Shivendu
Browse files

gogs: Introduce Gogs lister

parent 1bf11aa2
No related branches found
No related tags found
1 merge request!286gogs: Introduce Gogs lister
......@@ -7,3 +7,4 @@ Hezekiah Maina
Boris Baldassari
Léo Andrès
Franck Bret
Kumar Shivendu
......@@ -24,6 +24,7 @@ following Python modules:
- `swh.lister.phabricator`
- `swh.lister.pypi`
- `swh.lister.tuleap`
- `swh.lister.gogs`
Dependencies
------------
......
......@@ -74,6 +74,7 @@ setup(
lister.sourceforge=swh.lister.sourceforge:register
lister.tuleap=swh.lister.tuleap:register
lister.maven=swh.lister.maven:register
lister.gogs=swh.lister.gogs:register
""",
classifiers=[
"Programming Language :: Python :: 3",
......
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
def register():
from .lister import GogsLister
return {
"lister": GogsLister,
"task_modules": [f"{__name__}.tasks"],
}
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
import random
from typing import Any, Dict, Iterator, List, Optional
from urllib.parse import urljoin
import iso8601
import requests
from tenacity.before_sleep import before_sleep_log
from swh.lister.utils import throttling_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
from .. import USER_AGENT
from ..pattern import CredentialsType, StatelessLister
logger = logging.getLogger(__name__)
# Aliasing page results returned by `GogsLister.get_pages` method
GogsListerPage = List[Dict[str, Any]]
class GogsLister(StatelessLister[GogsListerPage]):
"""List origins from the Gogs
Gogs API documentation: https://github.com/gogs/docs-api
The API is protected behind authentication so credentials/API tokens
are mandatory. It supports pagination and provides next page URL
through the 'next' value of the 'Link' header. The default value for
page size ('limit') is 10 but the maximum allowed value is 50.
"""
LISTER_NAME = "gogs"
VISIT_TYPE = "git"
REPO_LIST_PATH = "repos/search"
def __init__(
self,
scheduler: SchedulerInterface,
url: str,
instance: Optional[str] = None,
api_token: Optional[str] = None,
page_size: int = 50,
credentials: CredentialsType = None,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
url=url,
instance=instance,
)
self.query_params = {
"limit": page_size,
"page": 1,
}
self.api_token = api_token
if self.api_token is None:
if len(self.credentials) > 0:
cred = random.choice(self.credentials)
username = cred.get("username")
self.api_token = cred["password"]
logger.warning(
"Using authentication credentials from user %s", username or "???"
)
else:
raise ValueError("No credentials or API token provided")
self.max_page_limit = 2
self.session = requests.Session()
self.session.headers.update(
{
"Accept": "application/json",
"User-Agent": USER_AGENT,
"Authorization": f"token {self.api_token}",
}
)
@throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
def page_request(self, url, params) -> requests.Response:
logger.debug("Fetching URL %s with params %s", url, params)
response = self.session.get(url, params=params)
if response.status_code != 200:
logger.warning(
"Unexpected HTTP status code %s on %s: %s",
response.status_code,
response.url,
response.content,
)
response.raise_for_status()
return response
@classmethod
def results_simplified(cls, body: Dict[str, GogsListerPage]) -> GogsListerPage:
fields_filter = ["id", "clone_url", "updated_at"]
return [{k: r[k] for k in fields_filter} for r in body["data"]]
def get_pages(self) -> Iterator[GogsListerPage]:
# base with trailing slash, path without leading slash for urljoin
url = urljoin(self.url, self.REPO_LIST_PATH)
response = self.page_request(url, self.query_params)
while True:
page_results = self.results_simplified(response.json())
yield page_results
assert len(response.links) > 0, "API changed: no Link header found"
if "next" in response.links:
url = response.links["next"]["url"]
else:
break
response = self.page_request(url, {})
def get_origins_from_page(self, page: GogsListerPage) -> Iterator[ListedOrigin]:
"""Convert a page of Gogs repositories into a list of ListedOrigins"""
assert self.lister_obj.id is not None
for repo in page:
last_update = iso8601.parse_date(repo["updated_at"])
yield ListedOrigin(
lister_id=self.lister_obj.id,
visit_type=self.VISIT_TYPE,
url=repo["clone_url"],
last_update=last_update,
)
# Copyright (C) 2022 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Dict, Optional
from celery import shared_task
from .lister import GogsLister
@shared_task(name=__name__ + ".FullGogsRelister")
def list_gogs_full(
url: str,
instance: Optional[str] = None,
api_token: Optional[str] = None,
page_size: Optional[int] = None,
) -> Dict[str, int]:
"""Full update of a Gogs instance"""
lister = GogsLister.from_configfile(
url=url, instance=instance, api_token=api_token, page_size=page_size
)
return lister.run().dict()
@shared_task(name=__name__ + ".ping")
def _ping() -> str:
return "OK"
{
"data": [
{
"id": 190,
"owner": {
"id": 338,
"username": "carwyn",
"login": "carwyn",
"full_name": "",
"email": "carwyn@carwyn.com",
"avatar_url": "https://secure.gravatar.com/avatar/65a98c538bcc360e9e9739d2af7908b0?d=identicon"
},
"name": "test-repo",
"full_name": "carwyn/test-repo",
"description": "An example.",
"private": false,
"fork": false,
"parent": null,
"empty": false,
"mirror": false,
"size": 1024,
"html_url": "https://try.gogs.io/carwyn/test-repo",
"ssh_url": "git@try.gogs.io:carwyn/test-repo.git",
"clone_url": "https://try.gogs.io/carwyn/test-repo.git",
"website": "",
"stars_count": 0,
"forks_count": 0,
"watchers_count": 1,
"open_issues_count": 0,
"default_branch": "master",
"created_at": "2015-02-17T21:11:54Z",
"updated_at": "2022-03-26T07:28:38Z"
},
{
"id": 258,
"owner": {
"id": 462,
"username": "juquinha",
"login": "juquinha",
"full_name": "",
"email": "juquinha123@mailinator.com",
"avatar_url": "https://secure.gravatar.com/avatar/40cdc8c32069ac441ff7f5c9bfe0f9ef?d=identicon"
},
"name": "zicarepo",
"full_name": "juquinha/zicarepo",
"description": "Foo test.",
"private": false,
"fork": false,
"parent": null,
"empty": false,
"mirror": false,
"size": 8192,
"html_url": "https://try.gogs.io/juquinha/zicarepo",
"ssh_url": "git@try.gogs.io:juquinha/zicarepo.git",
"clone_url": "https://try.gogs.io/juquinha/zicarepo.git",
"website": "",
"stars_count": 0,
"forks_count": 0,
"watchers_count": 1,
"open_issues_count": 1,
"default_branch": "master",
"created_at": "2015-02-24T12:13:57Z",
"updated_at": "2022-03-26T07:28:38Z"
},
{
"id": 334,
"owner": {
"id": 582,
"username": "ivilata",
"login": "ivilata",
"full_name": "",
"email": "ivan@pangea.org",
"avatar_url": "https://secure.gravatar.com/avatar/ed21e55837a9080c57181f624aefa905?d=identicon"
},
"name": "footest",
"full_name": "ivilata/footest",
"description": "Dummy repo for testing issue handling mainly.",
"private": false,
"fork": false,
"parent": null,
"empty": false,
"mirror": false,
"size": 3072,
"html_url": "https://try.gogs.io/ivilata/footest",
"ssh_url": "git@try.gogs.io:ivilata/footest.git",
"clone_url": "https://try.gogs.io/ivilata/footest.git",
"website": "",
"stars_count": 0,
"forks_count": 0,
"watchers_count": 1,
"open_issues_count": 1,
"default_branch": "master",
"created_at": "2015-03-03T17:03:45Z",
"updated_at": "2022-03-26T07:28:38Z"
}
],
"ok": true
}
{
"data": [
{
"id": 337,
"owner": {
"id": 585,
"username": "zork",
"login": "zork",
"full_name": "",
"email": "f905334@trbvm.com",
"avatar_url": "https://secure.gravatar.com/avatar/ebcb8e171a1a47fde8ded46b2618f135?d=identicon"
},
"name": "zork-repo",
"full_name": "zork/zork-repo",
"description": "This is a test thing.",
"private": false,
"fork": false,
"parent": null,
"empty": false,
"mirror": false,
"size": 13312,
"html_url": "https://try.gogs.io/zork/zork-repo",
"ssh_url": "git@try.gogs.io:zork/zork-repo.git",
"clone_url": "https://try.gogs.io/zork/zork-repo.git",
"website": "",
"stars_count": 0,
"forks_count": 0,
"watchers_count": 1,
"open_issues_count": 0,
"default_branch": "master",
"created_at": "2015-03-03T22:31:53Z",
"updated_at": "2022-03-26T07:28:38Z"
},
{
"id": 338,
"owner": {
"id": 585,
"username": "zork",
"login": "zork",
"full_name": "",
"email": "f905334@trbvm.com",
"avatar_url": "https://secure.gravatar.com/avatar/ebcb8e171a1a47fde8ded46b2618f135?d=identicon"
},
"name": "supernova",
"full_name": "zork/supernova",
"description": "This is a description. Blah blah blah.",
"private": false,
"fork": false,
"parent": null,
"empty": false,
"mirror": false,
"size": 1471488,
"html_url": "https://try.gogs.io/zork/supernova",
"ssh_url": "git@try.gogs.io:zork/supernova.git",
"clone_url": "https://try.gogs.io/zork/supernova.git",
"website": "",
"stars_count": 0,
"forks_count": 0,
"watchers_count": 1,
"open_issues_count": 0,
"default_branch": "master",
"created_at": "2015-03-03T22:44:20Z",
"updated_at": "2022-03-26T07:28:38Z"
},
{
"id": 339,
"owner": {
"id": 585,
"username": "zork",
"login": "zork",
"full_name": "",
"email": "f905334@trbvm.com",
"avatar_url": "https://secure.gravatar.com/avatar/ebcb8e171a1a47fde8ded46b2618f135?d=identicon"
},
"name": "digits",
"full_name": "zork/digits",
"description": "Distantly related to the game Mastermind, you are given clues to help determine a random number combination. The object of the game is to guess the solution in as few tries as possible.",
"private": false,
"fork": false,
"parent": null,
"empty": false,
"mirror": false,
"size": 18432,
"html_url": "https://try.gogs.io/zork/digits",
"ssh_url": "git@try.gogs.io:zork/digits.git",
"clone_url": "https://try.gogs.io/zork/digits.git",
"website": "",
"stars_count": 0,
"forks_count": 1,
"watchers_count": 1,
"open_issues_count": 0,
"default_branch": "master",
"created_at": "2015-03-03T22:47:56Z",
"updated_at": "2022-03-26T07:28:38Z"
}
],
"ok": true
}
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
from pathlib import Path
from typing import List
from unittest.mock import Mock
import pytest
from requests import HTTPError
from swh.lister.gogs.lister import GogsLister
from swh.scheduler.model import ListedOrigin
TRY_GOGS_URL = "https://try.gogs.io/api/v1/"
def try_gogs_page(n: int):
return TRY_GOGS_URL + f"repos/search?page={n}&limit=3"
@pytest.fixture
def trygogs_p1(datadir):
text = Path(datadir, "https_try.gogs.io", "repos_page1").read_text()
headers = {
"Link": '<{p2}>; rel="next",<{p2}>; rel="last"'.format(p2=try_gogs_page(2))
}
page_result = GogsLister.results_simplified(json.loads(text))
origin_urls = [r["clone_url"] for r in page_result]
return text, headers, page_result, origin_urls
@pytest.fixture
def trygogs_p2(datadir):
text = Path(datadir, "https_try.gogs.io", "repos_page2").read_text()
headers = {
"Link": '<{p1}>; rel="prev",<{p1}>; rel="first"'.format(p1=try_gogs_page(1))
}
page_result = GogsLister.results_simplified(json.loads(text))
origin_urls = [r["clone_url"] for r in page_result]
return text, headers, page_result, origin_urls
@pytest.fixture
def trygogs_empty_page():
origins_urls = []
page_result = {"data": [], "ok": True}
headers = {
"Link": '<{p1}>; rel="prev",<{p1}>; rel="first"'.format(p1=try_gogs_page(1))
}
text = json.dumps(page_result)
return text, headers, page_result, origins_urls
def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]):
"""Asserts that the two collections have the same origin URLs.
Does not test last_update."""
sorted_lister_urls = list(sorted(lister_urls))
sorted_scheduler_origins = list(sorted(scheduler_origins))
assert len(sorted_lister_urls) == len(sorted_scheduler_origins)
for l_url, s_origin in zip(sorted_lister_urls, sorted_scheduler_origins):
assert l_url == s_origin.url
def test_gogs_full_listing(
swh_scheduler, requests_mock, mocker, trygogs_p1, trygogs_p2, trygogs_empty_page
):
kwargs = dict(
url=TRY_GOGS_URL, instance="try_gogs", page_size=3, api_token="secret"
)
lister = GogsLister(scheduler=swh_scheduler, **kwargs)
lister.get_origins_from_page: Mock = mocker.spy(lister, "get_origins_from_page")
p1_text, p1_headers, p1_result, p1_origin_urls = trygogs_p1
p2_text, p2_headers, p2_result, p2_origin_urls = trygogs_p2
p3_text, p3_headers, _, _ = trygogs_empty_page
requests_mock.get(try_gogs_page(1), text=p1_text, headers=p1_headers)
requests_mock.get(try_gogs_page(2), text=p2_text, headers=p2_headers)
requests_mock.get(try_gogs_page(3), text=p3_text, headers=p3_headers)
stats = lister.run()
assert stats.pages == 2
assert stats.origins == 6
calls = [mocker.call(p1_result), mocker.call(p2_result)]
lister.get_origins_from_page.assert_has_calls(calls)
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
check_listed_origins(p1_origin_urls + p2_origin_urls, scheduler_origins)
assert lister.get_state_from_scheduler() is None
def test_gogs_auth_instance(
swh_scheduler, requests_mock, trygogs_p1, trygogs_empty_page
):
"""Covers token authentication, token from credentials,
instance inference from URL."""
api_token = "secret"
instance = "try.gogs.io"
creds = {"gogs": {instance: [{"username": "u", "password": api_token}]}}
kwargs1 = dict(url=TRY_GOGS_URL, api_token=api_token, instance=instance)
lister = GogsLister(scheduler=swh_scheduler, **kwargs1)
# test API token
assert "Authorization" in lister.session.headers
assert lister.session.headers["Authorization"].lower() == "token %s" % api_token
with pytest.raises(ValueError, match="No credentials or API token provided"):
kwargs2 = dict(url=TRY_GOGS_URL, instance=instance)
GogsLister(scheduler=swh_scheduler, **kwargs2)
kwargs3 = dict(url=TRY_GOGS_URL, credentials=creds, instance=instance, page_size=3)
lister = GogsLister(scheduler=swh_scheduler, **kwargs3)
# test API token from credentials
assert "Authorization" in lister.session.headers
assert lister.session.headers["Authorization"].lower() == "token %s" % api_token
# test instance inference from URL
assert lister.instance
assert "gogs" in lister.instance
# setup requests mocking
p1_text, p1_headers, _, _ = trygogs_p1
p2_text, p2_headers, _, _ = trygogs_empty_page
base_url = TRY_GOGS_URL + lister.REPO_LIST_PATH
requests_mock.get(base_url, text=p1_text, headers=p1_headers)
requests_mock.get(try_gogs_page(2), text=p2_text, headers=p2_headers)
# now check the lister runs without error
stats = lister.run()
assert stats.pages == 2
assert stats.origins == 3
@pytest.mark.parametrize("http_code", [400, 500, 502])
def test_gogs_list_http_error(swh_scheduler, requests_mock, http_code):
"""Test handling of some HTTP errors commonly encountered"""
lister = GogsLister(scheduler=swh_scheduler, url=TRY_GOGS_URL, api_token="secret")
base_url = TRY_GOGS_URL + lister.REPO_LIST_PATH
requests_mock.get(base_url, status_code=http_code)
with pytest.raises(HTTPError):
lister.run()
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
assert len(scheduler_origins) == 0
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from unittest.mock import patch
from swh.lister.pattern import ListerStats
def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
res = swh_scheduler_celery_app.send_task("swh.lister.gogs.tasks.ping")
assert res
res.wait()
assert res.successful()
assert res.result == "OK"
@patch("swh.lister.gogs.tasks.GogsLister")
def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
lister.from_configfile.return_value = lister
lister.run.return_value = ListerStats(pages=10, origins=500)
kwargs = dict(url="https://try.gogs.io/api/v1/")
res = swh_scheduler_celery_app.send_task(
"swh.lister.gogs.tasks.FullGogsRelister",
kwargs=kwargs,
)
assert res
res.wait()
assert res.successful()
actual_kwargs = dict(**kwargs, instance=None, api_token=None, page_size=None)
lister.from_configfile.assert_called_once_with(**actual_kwargs)
lister.run.assert_called_once_with()
@patch("swh.lister.gogs.tasks.GogsLister")
def test_full_listing_params(
lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
):
lister.from_configfile.return_value = lister
lister.run.return_value = ListerStats(pages=10, origins=500)
kwargs = dict(
url="https://gogs-host.com/api/v1/",
instance="foo",
api_token="test",
page_size=50,
)
res = swh_scheduler_celery_app.send_task(
"swh.lister.gogs.tasks.FullGogsRelister",
kwargs=kwargs,
)
assert res
res.wait()
assert res.successful()
lister.from_configfile.assert_called_once_with(**kwargs)
lister.run.assert_called_once_with()
......@@ -31,6 +31,10 @@ lister_args = {
"url": "https://repo1.maven.org/maven2/",
"index_url": "http://indexes/export.fld",
},
"gogs": {
"url": "https://try.gogs.io/",
"api_token": "secret",
},
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment