Skip to content
Snippets Groups Projects
Verified Commit a4560551 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

lister: Allow lister to build url out of the instance parameter

This pushes the rather elementary logic within the lister's scope. This will simplify
and unify cli call between lister and scheduler clis. This will also allow to reduce
erroneous operations which can happen for example in the add-forge-now.

With the following, we will only have to provide the type and the instance, then
everything will be scheduled properly.

Refs. swh/devel/swh-lister#4693
parent 596e8c6c
No related branches found
No related tags found
No related merge requests found
......@@ -99,7 +99,7 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
def __init__(
self,
scheduler,
url: str,
url: Optional[str] = None,
name: Optional[str] = "gitlab",
instance: Optional[str] = None,
credentials: Optional[CredentialsType] = None,
......@@ -113,7 +113,8 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
self.LISTER_NAME = name
super().__init__(
scheduler=scheduler,
url=url.rstrip("/"),
# if url is empty, it will be built in `build_url` method
url=url.rstrip("/") if url is not None else None,
instance=instance,
credentials=credentials,
max_origins_per_page=max_origins_per_page,
......@@ -138,6 +139,11 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
if api_token:
self.session.headers["Authorization"] = f"Bearer {api_token}"
def build_url(self, instance: str) -> str:
"""Build gitlab api url."""
prefix_url = super().build_url(instance)
return f"{prefix_url}/api/v4"
def state_from_dict(self, d: Dict[str, Any]) -> GitLabListerState:
return GitLabListerState(**d)
......
......@@ -22,17 +22,30 @@ logger = logging.getLogger(__name__)
def api_url(instance: str) -> str:
return f"https://{instance}/api/v4/"
return f"https://{instance}/api/v4"
def _match_request(request, lister_name="gitlab"):
return request.headers.get("User-Agent") == USER_AGENT_TEMPLATE % lister_name
def test_lister_gitlab_fail_to_instantiate(swh_scheduler):
"""Build a lister without its url nor its instance should raise"""
# while it's fine with either the url or the instance
assert GitLabLister(swh_scheduler, url="https://gitlab.com/api/v4") is not None
assert GitLabLister(swh_scheduler, instance="gitlab.fr") is not None
# It will raise without any of those
with pytest.raises(ValueError, match="'url' or 'instance'"):
GitLabLister(swh_scheduler)
def test_lister_gitlab(datadir, swh_scheduler, requests_mock):
"""Gitlab lister supports full listing"""
instance = "gitlab.com"
lister = GitLabLister(swh_scheduler, url=api_url(instance), instance=instance)
lister = GitLabLister(swh_scheduler, instance=instance)
assert lister.url == api_url(instance)
response = gitlab_page_response(datadir, instance, 1)
......
# Copyright (C) 2022 The Software Heritage developers
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -70,7 +70,7 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]):
def __init__(
self,
scheduler: SchedulerInterface,
url: str,
url: Optional[str] = None,
instance: Optional[str] = None,
api_token: Optional[str] = None,
page_size: int = 50,
......@@ -111,6 +111,11 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]):
"No authentication token set in configuration, using anonymous mode"
)
def build_url(self, instance: str) -> str:
"Build gogs url out of the instance."
prefix_url = super().build_url(instance)
return f"{prefix_url}/api/v1/"
def state_from_dict(self, d: Dict[str, Any]) -> GogsListerState:
return GogsListerState(**d)
......
# Copyright (C) 2022 The Software Heritage developers
# Copyright (C) 2022-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -99,13 +99,23 @@ def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedO
assert set(lister_urls) == {origin.url for origin in scheduler_origins}
def test_lister_gogs_fail_to_instantiate(swh_scheduler):
"""Build a lister without its url nor its instance should raise"""
# while it's fine with either the url or the instance
assert GogsLister(swh_scheduler, url="https://try.gogs.io/api/v1") is not None
assert GogsLister(swh_scheduler, instance="try.gogs.io") is not None
# It will raise without any of those
with pytest.raises(ValueError, match="'url' or 'instance'"):
GogsLister(swh_scheduler)
def test_gogs_full_listing(
swh_scheduler, requests_mock, mocker, trygogs_p1, trygogs_p2, trygogs_p3_last
):
kwargs = dict(
url=TRY_GOGS_URL, instance="try_gogs", page_size=3, api_token="secret"
)
kwargs = dict(instance="try.gogs.io", page_size=3, api_token="secret")
lister = GogsLister(scheduler=swh_scheduler, **kwargs)
assert lister.url == TRY_GOGS_URL
lister.get_origins_from_page: Mock = mocker.spy(lister, "get_origins_from_page")
......
......@@ -104,7 +104,7 @@ class Lister(Generic[StateType, PageType]):
def __init__(
self,
scheduler: SchedulerInterface,
url: str,
url: Optional[str] = None,
instance: Optional[str] = None,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
......@@ -115,11 +115,23 @@ class Lister(Generic[StateType, PageType]):
if not self.LISTER_NAME:
raise ValueError("Must set the LISTER_NAME attribute on Lister classes")
self.url = url
self.url: str
if url is not None:
# Retro-compability with lister already instantiated out of a provided url
self.url = url
elif url is None and instance is not None:
# Allow lister to be instantiated simply with their type and instance
# (as in their domain like "gitlab.com", "git.garbaye.fr", ...)
self.url = self.build_url(instance)
else:
raise ValueError(
"Either 'url' or 'instance' parameter should be provided for listing."
)
if instance is not None:
self.instance = instance
else:
self.instance = urlparse(url).netloc
self.instance = urlparse(self.url).netloc
self.scheduler = scheduler
......@@ -152,6 +164,25 @@ class Lister(Generic[StateType, PageType]):
self.max_origins_per_page = max_origins_per_page
self.enable_origins = enable_origins
def build_url(self, instance: str) -> str:
"""Optionally build the forge url to list. When the url is not provided in the
constructor, this method is called. This should compute the actual url to use to
list the forge.
This is particularly useful for forges which uses an api. This simplifies the
cli calls to use. They should then only provide the instance (its domain).
For example:
- gitlab: https://{instance}/api/v4
- gitea: https://{instance}/api/v1
- ...
"""
scheme = urlparse(instance).scheme
if scheme:
raise ValueError("Instance should only be a net location.")
return f"https://{instance}"
@http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
def http_request(self, url: str, method="GET", **kwargs) -> requests.Response:
logger.debug("Fetching URL %s with params %s", url, kwargs.get("params"))
......
......@@ -24,6 +24,24 @@ class InstantiableLister(pattern.Lister[StateType, PageType]):
return d
def test_instantiation_fail_to_instantiate(swh_scheduler):
"""Instantiation without proper url or instance will raise."""
# While instantiating with either a url or instance is fine...
InstantiableLister(scheduler=swh_scheduler, url="https://example.com")
InstantiableLister(scheduler=swh_scheduler, instance="example.com")
# ... Instantiating will when:
# - no instance nor url is provided to constructor
with pytest.raises(ValueError, match="'url' or 'instance"):
InstantiableLister(
scheduler=swh_scheduler,
)
# - an instance which is not a net location is provided
with pytest.raises(ValueError, match="net location"):
InstantiableLister(scheduler=swh_scheduler, instance="http://example.com")
def test_instantiation(swh_scheduler):
lister = InstantiableLister(
scheduler=swh_scheduler, url="https://example.com", instance="example.com"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment