Skip to content
Snippets Groups Projects
Commit 4f57e844 authored by Antoine Lambert's avatar Antoine Lambert
Browse files

Use http_retry decorator from swh.core.retry module

The http_retry decorator has been moved to swh-core package in order
to ease its reuse across swh packages.
parent 1ee549fc
No related branches found
No related tags found
No related merge requests found
......@@ -318,7 +318,7 @@ then immediately stop the listing by doing an equivalent of
:py:meth:`Response.raise_for_status` from the ``requests`` library. As for rate-limiting
errors, we have a strategy of using a flexible decorator to handle the retrying for us.
It is based on the ``tenacity`` library and accessible as :py:func:`http_retry` from
:py:mod:`swh.lister.utils`.
:py:mod:`swh.core.retry`.
Pagination
^^^^^^^^^^
......
swh.core[db,github] >= 2.16.1
swh.core[db,github] >= 2.22.0
swh.scheduler >= 0.8
......@@ -9,8 +9,8 @@ import os
import pytest
from swh.core.retry import MAX_NUMBER_ATTEMPTS
from swh.lister.bitbucket.lister import BitbucketLister
from swh.lister.utils import MAX_NUMBER_ATTEMPTS
@pytest.fixture
......@@ -188,7 +188,6 @@ def test_bitbucket_lister_buggy_page(
bb_api_repositories_page1,
bb_api_repositories_page2,
):
requests_mock.get(
BitbucketLister.API_URL,
[
......
# Copyright (C) 2018-2022 The Software Heritage developers
# Copyright (C) 2018-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -14,8 +14,8 @@ from requests.exceptions import HTTPError
from requests.status_codes import codes
from tenacity.before_sleep import before_sleep_log
from swh.core.retry import http_retry, is_retryable_exception
from swh.lister.pattern import CredentialsType, Lister
from swh.lister.utils import http_retry, is_retryable_exception
from swh.scheduler.model import ListedOrigin
logger = logging.getLogger(__name__)
......
......@@ -12,11 +12,11 @@ from typing import Dict, List
import pytest
from requests.status_codes import codes
from swh.core.retry import WAIT_EXP_BASE
from swh.lister import USER_AGENT_TEMPLATE
from swh.lister.gitlab.lister import GitLabLister, _parse_id_after
from swh.lister.pattern import ListerStats
from swh.lister.tests.test_utils import assert_sleep_calls
from swh.lister.utils import WAIT_EXP_BASE
from swh.lister.tests.utils import assert_sleep_calls
logger = logging.getLogger(__name__)
......
# Copyright (C) 2022 The Software Heritage developers
# Copyright (C) 2022-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -8,9 +8,9 @@ from pathlib import Path
import iso8601
from swh.core.retry import WAIT_EXP_BASE
from swh.lister.golang.lister import GolangLister, GolangStateType
from swh.lister.tests.test_utils import assert_sleep_calls
from swh.lister.utils import WAIT_EXP_BASE
from swh.lister.tests.utils import assert_sleep_calls
# https://pkg.go.dev prefix omitted
expected_listed = [
......@@ -98,7 +98,6 @@ def _generate_responses(datadir, requests_mock):
def test_golang_lister(swh_scheduler, mocker, requests_mock, datadir):
# Exponential retries take a long time, so stub time.sleep
mocked_sleep = mocker.patch.object(GolangLister.http_request.retry, "sleep")
......
# Copyright (C) 2020-2022 The Software Heritage developers
# Copyright (C) 2020-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -14,7 +14,7 @@ from lazr.restfulclient.errors import RestfulError
from lazr.restfulclient.resource import Collection
from tenacity.before_sleep import before_sleep_log
from swh.lister.utils import http_retry, retry_if_exception
from swh.core.retry import http_retry, retry_if_exception
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
......
# Copyright (C) 2020-2022 The Software Heritage developers
# Copyright (C) 2020-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -16,12 +16,13 @@ from tenacity.before_sleep import before_sleep_log
from swh.core.config import load_from_envvar
from swh.core.github.utils import GitHubSession
from swh.core.retry import http_retry
from swh.core.utils import grouper
from swh.scheduler import get_scheduler, model
from swh.scheduler.interface import SchedulerInterface
from . import USER_AGENT_TEMPLATE
from .utils import http_retry, is_valid_origin_url
from .utils import is_valid_origin_url
logger = logging.getLogger(__name__)
......@@ -153,7 +154,6 @@ class Lister(Generic[StateType, PageType]):
@http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
def http_request(self, url: str, method="GET", **kwargs) -> requests.Response:
logger.debug("Fetching URL %s with params %s", url, kwargs.get("params"))
response = self.session.request(method, url, **kwargs)
......
# Copyright (C) 2018-2021 The Software Heritage developers
# Copyright (C) 2018-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -13,7 +13,7 @@ from xmlrpc.client import Fault, ServerProxy
from tenacity.before_sleep import before_sleep_log
from swh.lister.utils import http_retry
from swh.core.retry import http_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
......
# Copyright (C) 2021-2022 The Software Heritage developers
# Copyright (C) 2021-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -13,6 +13,7 @@ from iso8601 import iso8601
import pytest
from requests.exceptions import HTTPError
from swh.core.retry import WAIT_EXP_BASE
from swh.lister import USER_AGENT_TEMPLATE
from swh.lister.sourceforge.lister import (
MAIN_SITEMAP_URL,
......@@ -20,8 +21,7 @@ from swh.lister.sourceforge.lister import (
SourceForgeLister,
SourceForgeListerState,
)
from swh.lister.tests.test_utils import assert_sleep_calls
from swh.lister.utils import WAIT_EXP_BASE
from swh.lister.tests.utils import assert_sleep_calls
# Mapping of project name to namespace
from swh.scheduler.model import ListedOrigin
......@@ -368,7 +368,6 @@ def test_sourceforge_lister_incremental(swh_scheduler, requests_mock, datadir, m
def test_sourceforge_lister_retry(swh_scheduler, requests_mock, mocker, datadir):
lister = SourceForgeLister(scheduler=swh_scheduler)
# Exponential retries take a long time, so stub time.sleep
......
# Copyright (C) 2018-2022 the Software Heritage developers
# Copyright (C) 2018-2023 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import pytest
import requests
from requests.status_codes import codes
from tenacity.wait import wait_fixed
from swh.lister.utils import MAX_NUMBER_ATTEMPTS, WAIT_EXP_BASE, http_retry, split_range
from swh.lister.utils import split_range
@pytest.mark.parametrize(
......@@ -41,90 +38,3 @@ def test_split_range_errors(total_pages, nb_pages):
for total_pages, nb_pages in [(None, 1), (100, None)]:
with pytest.raises(TypeError):
next(split_range(total_pages, nb_pages))
TEST_URL = "https://example.og/api/repositories"
@http_retry()
def make_request():
response = requests.get(TEST_URL)
response.raise_for_status()
return response
def assert_sleep_calls(mocker, mock_sleep, sleep_params):
mock_sleep.assert_has_calls([mocker.call(param) for param in sleep_params])
@pytest.mark.parametrize(
"status_code",
[
codes.too_many_requests,
codes.internal_server_error,
codes.bad_gateway,
codes.service_unavailable,
],
)
def test_http_retry(requests_mock, mocker, status_code):
data = {"result": {}}
requests_mock.get(
TEST_URL,
[
{"status_code": status_code},
{"status_code": status_code},
{"status_code": codes.ok, "json": data},
],
)
mock_sleep = mocker.patch.object(make_request.retry, "sleep")
response = make_request()
assert_sleep_calls(mocker, mock_sleep, [1, WAIT_EXP_BASE])
assert response.json() == data
def test_http_retry_max_attemps(requests_mock, mocker):
requests_mock.get(
TEST_URL,
[{"status_code": codes.too_many_requests}] * (MAX_NUMBER_ATTEMPTS),
)
mock_sleep = mocker.patch.object(make_request.retry, "sleep")
with pytest.raises(requests.exceptions.HTTPError) as e:
make_request()
assert e.value.response.status_code == codes.too_many_requests
assert_sleep_calls(
mocker,
mock_sleep,
[float(WAIT_EXP_BASE**i) for i in range(MAX_NUMBER_ATTEMPTS - 1)],
)
@http_retry(wait=wait_fixed(WAIT_EXP_BASE))
def make_request_wait_fixed():
response = requests.get(TEST_URL)
response.raise_for_status()
return response
def test_http_retry_wait_fixed(requests_mock, mocker):
requests_mock.get(
TEST_URL,
[
{"status_code": codes.too_many_requests},
{"status_code": codes.too_many_requests},
{"status_code": codes.ok},
],
)
mock_sleep = mocker.patch.object(make_request_wait_fixed.retry, "sleep")
make_request_wait_fixed()
assert_sleep_calls(mocker, mock_sleep, [WAIT_EXP_BASE] * 2)
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
def assert_sleep_calls(mocker, mock_sleep, sleep_params):
mock_sleep.assert_has_calls([mocker.call(param) for param in sleep_params])
# Copyright (C) 2018-2022 the Software Heritage developers
# Copyright (C) 2018-2023 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Callable, Iterator, Optional, Tuple
from typing import Iterator, Optional, Tuple
import urllib.parse
from requests.exceptions import ConnectionError, HTTPError
from requests.status_codes import codes
from tenacity import retry as tenacity_retry
from tenacity.stop import stop_after_attempt
from tenacity.wait import wait_exponential
def split_range(total_pages: int, nb_pages: int) -> Iterator[Tuple[int, int]]:
"""Split `total_pages` into mostly `nb_pages` ranges. In some cases, the last range can
......@@ -36,84 +30,6 @@ def split_range(total_pages: int, nb_pages: int) -> Iterator[Tuple[int, int]]:
yield index, total_pages
def is_throttling_exception(e: Exception) -> bool:
"""
Checks if an exception is a requests.exception.HTTPError for
a response with status code 429 (too many requests).
"""
return (
isinstance(e, HTTPError) and e.response.status_code == codes.too_many_requests
)
def is_retryable_exception(e: Exception) -> bool:
"""
Checks if an exception is worth retrying (connection, throttling or a server error).
"""
is_connection_error = isinstance(e, ConnectionError)
is_500_error = isinstance(e, HTTPError) and e.response.status_code >= 500
return is_connection_error or is_throttling_exception(e) or is_500_error
def retry_if_exception(retry_state, predicate: Callable[[Exception], bool]) -> bool:
"""
Custom tenacity retry predicate for handling exceptions with the given predicate.
"""
attempt = retry_state.outcome
if attempt.failed:
exception = attempt.exception()
return predicate(exception)
return False
def retry_policy_generic(retry_state) -> bool:
"""
Custom tenacity retry predicate for handling failed requests:
- ConnectionError
- Server errors (status >= 500)
- Throttling errors (status == 429)
This does not handle 404, 403 or other status codes.
"""
return retry_if_exception(retry_state, is_retryable_exception)
WAIT_EXP_BASE = 10
MAX_NUMBER_ATTEMPTS = 5
def http_retry(
retry=retry_policy_generic,
wait=wait_exponential(exp_base=WAIT_EXP_BASE),
stop=stop_after_attempt(max_attempt_number=MAX_NUMBER_ATTEMPTS),
**retry_args,
):
"""
Decorator based on `tenacity` for retrying a function possibly raising
requests.exception.HTTPError for status code 429 (too many requests).
It provides a default configuration that should work properly in most
cases but all `tenacity.retry` parameters can also be overridden in client
code.
When the mmaximum of attempts is reached, the HTTPError exception will then
be reraised.
Args:
retry: function defining request retry condition (default to 429 status code)
https://tenacity.readthedocs.io/en/latest/#whether-to-retry
wait: function defining wait strategy before retrying (default to exponential
backoff) https://tenacity.readthedocs.io/en/latest/#waiting-before-retrying
stop: function defining when to stop retrying (default after 5 attempts)
https://tenacity.readthedocs.io/en/latest/#stopping
"""
return tenacity_retry(retry=retry, wait=wait, stop=stop, reraise=True, **retry_args)
def is_valid_origin_url(url: Optional[str]) -> bool:
"""Returns whether the given string is a valid origin URL.
This excludes Git SSH URLs and pseudo-URLs (eg. ``ssh://git@example.org:foo``
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment