Skip to content
Snippets Groups Projects
Verified Commit 9fa9f842 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

Extract reusable github tests fixtures into its own pytest_plugin

This also:
- sandbox the new module into its own package to simplify packaging

Related to T4232
parent e77c68ab
No related branches found
Tags v2.6.0
No related merge requests found
TEST_DIRS := ./swh/core/api/tests ./swh/core/db/tests ./swh/core/tests
TEST_DIRS := ./swh/core/api/tests ./swh/core/db/tests ./swh/core/tests ./swh/core/github/tests
......@@ -6,6 +6,8 @@ import pytest
settings.register_profile("fast", max_examples=5, deadline=5000)
settings.register_profile("slow", max_examples=20, deadline=5000)
pytest_plugins = ["swh.core.github.pytest_plugin"]
@pytest.fixture
def swhmain():
......
# requirements for swh.core.github
requests
tenacity
......@@ -6,4 +6,4 @@ flask
iso8601
msgpack >= 1.0.0
requests
tenacity
......@@ -57,6 +57,7 @@ setup(
"logging": parse_requirements("logging"),
"db": parse_requirements("db", "db-pytestplugin"),
"http": parse_requirements("http"),
"github": parse_requirements("github"),
# kitchen sink, please do not use
"testing": parse_requirements(
"test", "db", "db-pytestplugin", "http", "logging"
......
# Copyright (C) 2022 The Software Heritage developers
# Copyright (C) 2020-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
import time
from typing import Dict, Iterator, List, Optional, Union
import pytest
import requests_mock
from swh.core.github.utils import (
GitHubSession,
_sanitize_github_url,
_url_github_api,
_url_github_html,
get_canonical_github_origin_url,
)
KNOWN_GH_REPO = "https://github.com/user/repo"
@pytest.mark.parametrize(
"user_repo, expected_url",
[
("user/repo.git", KNOWN_GH_REPO),
("user/repo.git/", KNOWN_GH_REPO),
("user/repo/", KNOWN_GH_REPO),
("user/repo", KNOWN_GH_REPO),
("user/repo/.git", KNOWN_GH_REPO),
# edge cases
("https://github.com/unknown-page", None), # unknown gh origin returns None
("user/repo/with/some/deps", None), # url kind is not dealt with for now
],
)
def test_get_canonical_github_origin_url(user_repo, expected_url, requests_mock):
"""It should return a canonical github origin when it exists, None otherwise"""
html_url = _url_github_html(user_repo)
api_url = _url_github_api(_sanitize_github_url(user_repo))
if expected_url is not None:
status_code = 200
response = {"html_url": _sanitize_github_url(html_url)}
else:
status_code = 404
response = {}
requests_mock.get(api_url, [{"status_code": status_code, "json": response}])
assert get_canonical_github_origin_url(html_url) == expected_url
def test_get_canonical_github_origin_url_not_gh_origin():
"""It should return the input url when that origin is not a github one"""
url = "https://example.org"
assert get_canonical_github_origin_url(url) == url
HTTP_GITHUB_API_URL = "https://api.github.com/repositories"
def fake_time_sleep(duration: float, sleep_calls: Optional[List[float]] = None):
"""Record calls to time.sleep in the sleep_calls list"""
"""Record calls to time.sleep in the sleep_calls list."""
if duration < 0:
raise ValueError("Can't sleep for a negative amount of time!")
if sleep_calls is not None:
......@@ -136,9 +91,6 @@ def github_repo(i: int) -> Dict[str, Union[int, str]]:
return repo
HTTP_GH_API_URL = "https://api.github.com/repositories"
def github_response_callback(
request: requests_mock.request._RequestObjectProxy,
context: requests_mock.response._Context,
......@@ -162,7 +114,7 @@ def github_response_callback(
if next_page < origin_count:
# the first id for the next page is within our origin count; add a Link
# header to the response
next_url = f"{HTTP_GH_API_URL}?per_page={page_size}&since={next_page}"
next_url = f"{HTTP_GITHUB_API_URL}?per_page={page_size}&since={next_page}"
context.headers["Link"] = f"<{next_url}>; rel=next"
return [github_repo(i) for i in range(since + 1, min(next_page, origin_count) + 1)]
......@@ -174,25 +126,25 @@ def requests_ratelimited(
num_ratelimit: Optional[int],
ratelimit_reset: Optional[int],
) -> Iterator[requests_mock.Mocker]:
"""Mock requests to the GitHub API, returning a rate-limiting status code
after `num_before_ratelimit` requests.
"""Mock requests to the GitHub API, returning a rate-limiting status code after
`num_before_ratelimit` requests.
GitHub does inconsistent rate-limiting:
- Anonymous requests return a 403 status code
- Authenticated requests return a 429 status code, with an
X-Ratelimit-Reset header.
- Anonymous requests return a 403 status code
- Authenticated requests return a 429 status code, with an X-Ratelimit-Reset header.
This fixture takes multiple arguments (which can be overridden with a
:func:`pytest.mark.parametrize` parameter):
- num_before_ratelimit: the global number of requests until the
ratelimit triggers
- num_ratelimit: the number of requests that return a
rate-limited response.
- ratelimit_reset: the timestamp returned in X-Ratelimit-Reset if the
request is authenticated.
- num_before_ratelimit: the global number of requests until the ratelimit triggers
- num_ratelimit: the number of requests that return a rate-limited response.
- ratelimit_reset: the timestamp returned in X-Ratelimit-Reset if the request is
authenticated.
The default values set in the previous fixtures make all requests return a rate
limit response.
"""
current_request = 0
......@@ -208,7 +160,7 @@ def requests_ratelimited(
return github_response_callback(request, context)
with requests_mock.Mocker() as mock:
mock.get(HTTP_GH_API_URL, json=response_callback)
mock.get(HTTP_GITHUB_API_URL, json=response_callback)
yield mock
......@@ -230,108 +182,3 @@ def all_tokens(github_credentials) -> List[str]:
"""Return the list of tokens matching the static credential"""
return [t.get("token", t.get("password")) for t in github_credentials]
def test_github_session_anonymous_session():
user_agent = ("GitHub Session Test",)
github_session = GitHubSession(
user_agent=user_agent,
)
assert github_session.anonymous is True
actual_headers = github_session.session.headers
assert actual_headers["Accept"] == "application/vnd.github.v3+json"
assert actual_headers["User-Agent"] == user_agent
@pytest.mark.parametrize(
"num_ratelimit", [1] # return a single rate-limit response, then continue
)
def test_github_session_ratelimit_once_recovery(
caplog,
requests_ratelimited,
num_ratelimit,
monkeypatch_sleep_calls,
github_credentials,
):
"""GitHubSession should recover from hitting the rate-limit once"""
caplog.set_level(logging.DEBUG, "swh.core.github.utils")
github_session = GitHubSession(
user_agent="GitHub Session Test", credentials=github_credentials
)
res = github_session.request(f"{HTTP_GH_API_URL}?per_page=1000&since=10")
assert res.status_code == 200
token_users = []
for record in caplog.records:
if "Using authentication token" in record.message:
token_users.append(record.args[0])
# check that we used one more token than we saw rate limited requests
assert len(token_users) == 1 + num_ratelimit
# check that we slept for one second between our token uses
assert monkeypatch_sleep_calls == [1]
def test_github_session_authenticated_credentials(
caplog, github_credentials, all_tokens
):
"""GitHubSession should have Authorization headers set in authenticated mode"""
caplog.set_level(logging.DEBUG, "swh.core.github.utils")
github_session = GitHubSession(
"GitHub Session Test", credentials=github_credentials
)
assert github_session.anonymous is False
assert github_session.token_index == 0
assert (
sorted(github_session.credentials, key=lambda t: t["username"])
== github_credentials
)
assert github_session.session.headers["Authorization"] in [
f"token {t}" for t in all_tokens
]
@pytest.mark.parametrize(
# Do 5 successful requests, return 6 ratelimits (to exhaust the credentials) with a
# set value for X-Ratelimit-Reset, then resume listing successfully.
"num_before_ratelimit, num_ratelimit, ratelimit_reset",
[(5, 6, 123456)],
)
def test_github_session_ratelimit_reset_sleep(
caplog,
requests_ratelimited,
monkeypatch_sleep_calls,
num_before_ratelimit,
num_ratelimit,
ratelimit_reset,
github_credentials,
):
"""GitHubSession should handle rate-limit with authentication tokens."""
caplog.set_level(logging.DEBUG, "swh.core.github.utils")
github_session = GitHubSession(
user_agent="GitHub Session Test", credentials=github_credentials
)
for _ in range(num_ratelimit):
github_session.request(f"{HTTP_GH_API_URL}?per_page=1000&since=10")
# We sleep 1 second every time we change credentials, then we sleep until
# ratelimit_reset + 1
expected_sleep_calls = len(github_credentials) * [1] + [ratelimit_reset + 1]
assert monkeypatch_sleep_calls == expected_sleep_calls
found_exhaustion_message = False
for record in caplog.records:
if record.levelname == "INFO":
if "Rate limits exhausted for all tokens" in record.message:
found_exhaustion_message = True
break
assert found_exhaustion_message is True
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
import pytest
from swh.core.github.pytest_plugin import HTTP_GITHUB_API_URL
from swh.core.github.utils import (
GitHubSession,
_sanitize_github_url,
_url_github_api,
_url_github_html,
get_canonical_github_origin_url,
)
KNOWN_GH_REPO = "https://github.com/user/repo"
@pytest.mark.parametrize(
"user_repo, expected_url",
[
("user/repo.git", KNOWN_GH_REPO),
("user/repo.git/", KNOWN_GH_REPO),
("user/repo/", KNOWN_GH_REPO),
("user/repo", KNOWN_GH_REPO),
("user/repo/.git", KNOWN_GH_REPO),
# edge cases
("https://github.com/unknown-page", None), # unknown gh origin returns None
("user/repo/with/some/deps", None), # url kind is not dealt with for now
],
)
def test_get_canonical_github_origin_url(user_repo, expected_url, requests_mock):
"""It should return a canonical github origin when it exists, None otherwise"""
html_url = _url_github_html(user_repo)
api_url = _url_github_api(_sanitize_github_url(user_repo))
if expected_url is not None:
status_code = 200
response = {"html_url": _sanitize_github_url(html_url)}
else:
status_code = 404
response = {}
requests_mock.get(api_url, [{"status_code": status_code, "json": response}])
assert get_canonical_github_origin_url(html_url) == expected_url
def test_get_canonical_github_origin_url_not_gh_origin():
"""It should return the input url when that origin is not a github one"""
url = "https://example.org"
assert get_canonical_github_origin_url(url) == url
def test_github_session_anonymous_session():
user_agent = ("GitHub Session Test",)
github_session = GitHubSession(
user_agent=user_agent,
)
assert github_session.anonymous is True
actual_headers = github_session.session.headers
assert actual_headers["Accept"] == "application/vnd.github.v3+json"
assert actual_headers["User-Agent"] == user_agent
@pytest.mark.parametrize(
"num_ratelimit", [1] # return a single rate-limit response, then continue
)
def test_github_session_ratelimit_once_recovery(
caplog,
requests_ratelimited,
num_ratelimit,
monkeypatch_sleep_calls,
github_credentials,
):
"""GitHubSession should recover from hitting the rate-limit once"""
caplog.set_level(logging.DEBUG, "swh.core.github.utils")
github_session = GitHubSession(
user_agent="GitHub Session Test", credentials=github_credentials
)
res = github_session.request(f"{HTTP_GITHUB_API_URL}?per_page=1000&since=10")
assert res.status_code == 200
token_users = []
for record in caplog.records:
if "Using authentication token" in record.message:
token_users.append(record.args[0])
# check that we used one more token than we saw rate limited requests
assert len(token_users) == 1 + num_ratelimit
# check that we slept for one second between our token uses
assert monkeypatch_sleep_calls == [1]
def test_github_session_authenticated_credentials(
caplog, github_credentials, all_tokens
):
"""GitHubSession should have Authorization headers set in authenticated mode"""
caplog.set_level(logging.DEBUG, "swh.core.github.utils")
github_session = GitHubSession(
"GitHub Session Test", credentials=github_credentials
)
assert github_session.anonymous is False
assert github_session.token_index == 0
assert (
sorted(github_session.credentials, key=lambda t: t["username"])
== github_credentials
)
assert github_session.session.headers["Authorization"] in [
f"token {t}" for t in all_tokens
]
@pytest.mark.parametrize(
# Do 5 successful requests, return 6 ratelimits (to exhaust the credentials) with a
# set value for X-Ratelimit-Reset, then resume listing successfully.
"num_before_ratelimit, num_ratelimit, ratelimit_reset",
[(5, 6, 123456)],
)
def test_github_session_ratelimit_reset_sleep(
caplog,
requests_ratelimited,
monkeypatch_sleep_calls,
num_before_ratelimit,
num_ratelimit,
ratelimit_reset,
github_credentials,
):
"""GitHubSession should handle rate-limit with authentication tokens."""
caplog.set_level(logging.DEBUG, "swh.core.github.utils")
github_session = GitHubSession(
user_agent="GitHub Session Test", credentials=github_credentials
)
for _ in range(num_ratelimit):
github_session.request(f"{HTTP_GITHUB_API_URL}?per_page=1000&since=10")
# We sleep 1 second every time we change credentials, then we sleep until
# ratelimit_reset + 1
expected_sleep_calls = len(github_credentials) * [1] + [ratelimit_reset + 1]
assert monkeypatch_sleep_calls == expected_sleep_calls
found_exhaustion_message = False
for record in caplog.records:
if record.levelname == "INFO":
if "Rate limits exhausted for all tokens" in record.message:
found_exhaustion_message = True
break
assert found_exhaustion_message is True
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import pytest
import time
from swh.core.github.pytest_plugin import fake_time_sleep, fake_time_time
@pytest.mark.parametrize("duration", [10, 20, -1])
def test_fake_time_sleep(duration):
if duration < 0:
with pytest.raises(ValueError, match="negative"):
fake_time_sleep(duration, [])
else:
sleep_calls = []
fake_time_sleep(duration, sleep_calls)
assert duration in sleep_calls
def test_fake_time_time():
assert fake_time_time() == 0
def test_monkeypatch_sleep_calls(monkeypatch_sleep_calls):
sleeps = [10, 20, 30]
for sleep in sleeps:
# This adds the sleep number inside the monkeypatch_sleep_calls fixture
time.sleep(sleep)
assert sleep in monkeypatch_sleep_calls
assert len(monkeypatch_sleep_calls) == len(sleeps)
# This mocks time but adds nothing to the same fixture
time.time()
assert len(monkeypatch_sleep_calls) == len(sleeps)
def test_num_before_ratelimit(num_before_ratelimit):
assert num_before_ratelimit == 0
def test_ratelimit_reset(ratelimit_reset):
assert ratelimit_reset is None
def test_num_ratelimit(num_ratelimit):
assert num_ratelimit is None
[tox]
envlist=black,flake8,mypy,py3-{core,db,server}
envlist=black,flake8,mypy,py3-{core,db,server,github}
[testenv]
passenv = PYTHONASYNCIODEBUG
......@@ -8,6 +8,7 @@ extras =
core: logging
db: db
server: http
github: github
deps =
cover: pytest-cov
commands =
......@@ -17,12 +18,13 @@ commands =
core: {envsitepackagesdir}/swh/core/tests \
db: {envsitepackagesdir}/swh/core/db/tests \
server: {envsitepackagesdir}/swh/core/api/tests \
github: {envsitepackagesdir}/swh/core/github/tests \
{posargs}
[testenv:py3]
skip_install = true
deps = tox
commands = tox -e py3-core-db-server-slow-cover -- {posargs}
commands = tox -e py3-core-db-server-github-slow-cover -- {posargs}
[testenv:black]
skip_install = true
......@@ -45,6 +47,7 @@ extras =
logging
db
http
github
deps =
mypy==0.942
commands =
......@@ -61,6 +64,7 @@ extras =
logging
db
http
github
deps =
# fetch and install swh-docs in develop mode
-e git+https://forge.softwareheritage.org/source/swh-docs#egg=swh.docs
......@@ -83,6 +87,7 @@ extras =
logging
db
http
github
deps =
# install swh-docs in develop mode
-e ../swh-docs
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment