Skip to content
Snippets Groups Projects
Commit 323e2774 authored by Antoine Lambert's avatar Antoine Lambert
Browse files

gitea, gogs: Ensure query parameters are not duplicated in API URLs

Gitea API return next pagination link with all query parameters provided
to an API request.

As we were also passing a dict of fixed query parameters to the page_request
method, some query parameters ended up having multiple instances in the URL
for fetching a new page of repositories data. So each time a new page was
requested, new instances of these parameters were appended to the URL which
could result in a really long URL if the number of pages to retrieve is high
and make the request fail.

Also remove a debug log already present in http_request method.
parent aaae1a6b
No related branches found
No related tags found
1 merge request!527gitea, gogs: Ensure query parameters are not duplicated in API URLs
Pipeline #9224 failed
# Copyright (C) 2023 The Software Heritage developers
# Copyright (C) 2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -45,7 +45,6 @@ def _parse_page_id(url: Optional[str]) -> int:
class GogsLister(Lister[GogsListerState, GogsListerPage]):
"""List origins from the Gogs
Gogs API documentation: https://github.com/gogs/docs-api
......@@ -125,10 +124,8 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]):
return asdict(state)
def page_request(
self, url: str, params: Dict[str, Any]
self, url: str, params: Optional[Dict[str, Any]] = None
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
logger.debug("Fetching URL %s with params %s", url, params)
try:
response = self.http_request(url, params=params)
except HTTPError as http_error:
......@@ -177,7 +174,12 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]):
yield GogsListerPage(repos=repos, next_link=next_link)
if next_link is not None:
body, links = self.page_request(next_link, self.query_params)
parsed_url = urlparse(next_link)
query_params = {**self.query_params, **parse_qs(parsed_url.query)}
next_link = parsed_url._replace(
query=urlencode(query_params, doseq=True)
).geturl()
body, links = self.page_request(next_link)
def get_origins_from_page(self, page: GogsListerPage) -> Iterator[ListedOrigin]:
"""Convert a page of Gogs repositories into a list of ListedOrigins"""
......
# Copyright (C) 2022-2023 The Software Heritage developers
# Copyright (C) 2022-2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -7,6 +7,7 @@ import json
from pathlib import Path
from typing import List
from unittest.mock import Mock
from urllib.parse import parse_qs, urlparse
import pytest
from requests import HTTPError
......@@ -145,6 +146,13 @@ def test_gogs_full_listing(
lister.get_state_from_scheduler().last_seen_next_link == P3
) # P3 didn't provide any next link so it remains the last_seen_next_link
# check each query parameter has a single instance in Gogs API URLs
for request in requests_mock.request_history:
assert all(
len(values) == 1
for values in parse_qs(urlparse(request.url).query).values()
)
def test_gogs_auth_instance(
swh_scheduler, requests_mock, trygogs_p1, trygogs_p2, trygogs_p3_empty
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment