Skip to content
Snippets Groups Projects
Commit bcbb554e authored by Antoine Lambert's avatar Antoine Lambert
Browse files

package/utils: Add debug log and throttling retry to api_info

This function is used in multiple package loaders so add a throttling
retry policy and debug log about fetched URL.
parent d471cca9
No related branches found
No related tags found
1 merge request!316package/utils: Add debug log and throttling retry to api_info
......@@ -13,7 +13,6 @@ from urllib.parse import quote
import pytest
from requests.exceptions import HTTPError
from swh.loader.exception import NotFound
import swh.loader.package
from swh.loader.package.utils import api_info, download, release_name
......@@ -213,11 +212,11 @@ def test_api_info_failure(requests_mock):
status_code = 400
requests_mock.get(url, status_code=status_code)
with pytest.raises(NotFound) as e0:
with pytest.raises(
HTTPError, match=f"{status_code} Client Error: None for url: {url}"
):
api_info(url)
assert e0.value.args[0] == "Fail to query '%s'. Reason: %s" % (url, status_code)
def test_api_info(requests_mock):
"""Fetching json info from pypi project should be ok"""
......@@ -271,3 +270,39 @@ def test_download_retry_reraise(mocker, requests_mock, tmp_path):
with pytest.raises(HTTPError):
_check_download_ok(url, dest=str(tmp_path))
@pytest.fixture(autouse=True)
def mock_api_info_retry_sleep(mocker):
mocker.patch.object(api_info.retry, "sleep")
def test_api_info_retry(mocker, requests_mock, tmp_path):
url = "https://example.org/api/endpoint"
json_data = {"foo": "bar"}
requests_mock.get(
url,
[
{"status_code": 429},
{"status_code": 429},
{
"json": json_data,
"status_code": 200,
},
],
)
assert json.loads(api_info(url)) == json_data
def test_api_info_retry_reraise(mocker, requests_mock, tmp_path):
url = "https://example.org/api/endpoint"
requests_mock.get(
url,
[{"status_code": 429}] * 5,
)
with pytest.raises(HTTPError, match=f"429 Client Error: None for url: {url}"):
api_info(url)
......@@ -34,26 +34,6 @@ DOWNLOAD_HASHES = set(["sha1", "sha256", "length"])
EMPTY_AUTHOR = Person.from_fullname(b"")
def api_info(url: str, **extra_params) -> bytes:
"""Basic api client to retrieve information on project. This deals with
fetching json metadata about pypi projects.
Args:
url (str): The api url (e.g PyPI, npm, etc...)
Raises:
NotFound in case of query failures (for some reasons: 404, ...)
Returns:
The associated response's information
"""
response = requests.get(url, **{**DEFAULT_PARAMS, **extra_params})
if response.status_code != 200:
raise NotFound(f"Fail to query '{url}'. Reason: {response.status_code}")
return response.content
def _content_disposition_filename(header: str) -> Optional[str]:
fname = None
fnames = re.findall(r"filename[\*]?=([^;]+)", header)
......@@ -81,13 +61,16 @@ def _retry_if_throttling(retry_state) -> bool:
return False
@retry(
throttling_retry = retry(
retry=_retry_if_throttling,
wait=wait_exponential(exp_base=10),
stop=stop_after_attempt(max_attempt_number=5),
before_sleep=before_sleep_log(logger, logging.WARNING),
reraise=True,
)
@throttling_retry
def download(
url: str,
dest: str,
......@@ -181,6 +164,29 @@ def download(
return filepath, extrinsic_metadata
@throttling_retry
def api_info(url: str, **extra_params) -> bytes:
"""Basic api client to retrieve information, typically JSON metadata,
on software package.
Args:
url (str): The api url (e.g PyPI, npm, etc...)
Raises:
NotFound in case of query failures (for some reasons: 404, ...)
Returns:
The associated response's information
"""
logger.debug("Fetching %s", url)
response = requests.get(url, **{**DEFAULT_PARAMS, **extra_params})
if response.status_code == 404:
raise NotFound(f"Fail to query '{url}'. Reason: {response.status_code}")
response.raise_for_status()
return response.content
def release_name(version: str, filename: Optional[str] = None) -> str:
if filename:
return "releases/%s/%s" % (version, filename)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment