From 965be8f9ee16d5cd9edbaea58f0e2006553bf87d Mon Sep 17 00:00:00 2001 From: Antoine Lambert <anlambert@softwareheritage.org> Date: Fri, 24 Jan 2025 11:50:24 +0100 Subject: [PATCH] cli: Add 'sentry extract-origin-urls' command Usage: swh sentry extract-origin-urls [OPTIONS] Extract origin URLs from events. This command allows to extract origin URLs from Sentry events related to a Software Heritage loader and dumps them to stdout. Options: -u, --sentry-url TEXT Sentry URL [default: https://sentry.softwareheritage.org] -t, --sentry-token TEXT Bearer token required to communicate with Sentry API (can also be provided in SENTRY_TOKEN environment variable) [required] -i, --sentry-issue-number TEXT Sentry issue number to extract origin URLs from its events [required] -e, --environment TEXT Filter on environment: production or staging, both are selected by default -h, --help Show this message and exit. --- pyproject.toml | 1 + swh/core/cli/sentry.py | 110 ++++++++++++++++++ swh/core/pytest_plugin.py | 31 +++-- .../api_0_issues_112726_events | 56 +++++++++ .../api_0_issues_112726_events,cursor=0:100:0 | 56 +++++++++ .../api_0_issues_112726_events,cursor=0:200:0 | 1 + swh/core/tests/test_cli_sentry.py | 48 ++++++++ 7 files changed, 295 insertions(+), 8 deletions(-) create mode 100644 swh/core/cli/sentry.py create mode 100644 swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events create mode 100644 swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events,cursor=0:100:0 create mode 100644 swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events,cursor=0:200:0 create mode 100644 swh/core/tests/test_cli_sentry.py diff --git a/pyproject.toml b/pyproject.toml index cb13689..f81566c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ testing = {file = [ [project.entry-points."swh.cli.subcommands"] "swh.core.db" = "swh.core.cli.db" "swh.core.backend" = "swh.core.cli.backend" +"swh.core.sentry" = "swh.core.cli.sentry" [project.entry-points.pytest11] "pytest_swh_core" = "swh.core.pytest_plugin" diff --git a/swh/core/cli/sentry.py b/swh/core/cli/sentry.py new file mode 100644 index 0000000..41b15d7 --- /dev/null +++ b/swh/core/cli/sentry.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +# Copyright (C) 2025 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from typing import Any, Callable, Dict, List + +import click + +from swh.core.cli import CONTEXT_SETTINGS +from swh.core.cli import swh as swh_cli_group + + +def common_options(func): + import functools + + @click.option( + "--sentry-url", + "-u", + default="https://sentry.softwareheritage.org", + show_default=True, + help="Sentry URL", + ) + @click.option( + "--sentry-token", + "-t", + default=None, + envvar="SENTRY_TOKEN", + help=( + "Bearer token required to communicate with Sentry API (can also be provided " + "in SENTRY_TOKEN environment variable)" + ), + required=True, + ) + @click.option( + "--sentry-issue-number", + "-i", + help="Sentry issue number to extract origin URLs from its events", + required=True, + ) + @click.option( + "--environment", + "-e", + default="", + help="Filter on environment: production or staging, both are selected by default", + ) + @functools.wraps(func) + def wrapper(*args, **kwargs): + return func(*args, **kwargs) + + return wrapper + + +@swh_cli_group.group(name="sentry", context_settings=CONTEXT_SETTINGS) +def sentry(): + """Software Heritage tools for extracting data from the events associated to + a Sentry issue using Sentry REST API.""" + pass + + +def _process_sentry_events_pages( + sentry_url, + sentry_token, + sentry_issue_number, + events_page_process_callback: Callable[[List[Dict[str, Any]]], None], +): + import requests + + sentry_api_base_url = f"{sentry_url.rstrip('/')}/api/0" + sentry_issue_events_url = ( + f"{sentry_api_base_url}/issues/{sentry_issue_number}/events/" + ) + while True: + response = requests.get( + sentry_issue_events_url, headers={"Authorization": f"Bearer {sentry_token}"} + ) + events = response.json() + if not events: + break + events_page_process_callback(events) + sentry_issue_events_url = response.links.get("next", {}).get("url") + + +@sentry.command(name="extract-origin-urls", context_settings=CONTEXT_SETTINGS) +@common_options +def extract_origin_urls(sentry_url, sentry_token, sentry_issue_number, environment): + """Extract origin URLs from events. + + This command allows to extract origin URLs from Sentry events related to + a Software Heritage loader and dumps them to stdout.""" + + origin_urls = set() + + def _extract_origin_urls(events: List[Dict[str, Any]]): + for event in events: + tags = {tag["key"]: tag["value"] for tag in event.get("tags", [])} + env_match = environment in tags.get("environment", "") + if "swh.loader.origin_url" in tags and env_match: + origin_urls.add(tags["swh.loader.origin_url"]) + + _process_sentry_events_pages( + sentry_url, + sentry_token, + sentry_issue_number, + _extract_origin_urls, + ) + + for origin_url in sorted(origin_urls): + click.echo(origin_url) diff --git a/swh/core/pytest_plugin.py b/swh/core/pytest_plugin.py index a3d8332..0f431c1 100644 --- a/swh/core/pytest_plugin.py +++ b/swh/core/pytest_plugin.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2024 The Software Heritage developers +# Copyright (C) 2019-2025 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -10,7 +10,7 @@ import logging from os import path from pathlib import Path import re -from typing import Dict, List, Optional +from typing import Callable, Dict, List, Optional from urllib.parse import unquote, urlparse import pytest @@ -29,11 +29,12 @@ MAX_VISIT_FILES = 10 def get_response_cb( - request: requests.Request, + request, context, datadir, ignore_urls: List[str] = [], visits: Optional[Dict] = None, + response_context_callback: Optional[Callable[[object, object], None]] = None, ): """Mount point callback to fetch on disk the request's content. The request urls provided are url decoded first to resolve the associated file on disk. @@ -72,14 +73,16 @@ def get_response_cb( datadir/http_nowhere.com/path_to_resource,a=b,c=d Args: - request: Object requests - context (requests.Context): Object holding response metadata - information (status_code, headers, etc...) + request: input HTTP request + context: Object holding response metadata information (status_code, headers, etc...) datadir: Data files path ignore_urls: urls whose status response should be 404 even if the local file exists visits: Dict of url, number of visits. If None, disable multi visit support (default) + response_context_callback: Optional callback function taking request and + response context object (having headers, status_code, reason and cookies + as attributes) as parameters for extra processing Returns: Optional[FileDescriptor] on disk file to read from the test context @@ -117,6 +120,8 @@ def get_response_cb( return None fd = open(filepath, "rb") context.headers["content-length"] = str(path.getsize(filepath)) + if response_context_callback: + response_context_callback(request, context) return fd @@ -143,7 +148,9 @@ def datadir(request: pytest.FixtureRequest) -> str: def requests_mock_datadir_factory( - ignore_urls: List[str] = [], has_multi_visit: bool = False + ignore_urls: List[str] = [], + has_multi_visit: bool = False, + response_context_callback: Optional[Callable[[object, object], None]] = None, ): """This factory generates fixtures which allow to look for files on the local filesystem based on the requested URL, using the following rules: @@ -176,13 +183,21 @@ def requests_mock_datadir_factory( ignore_urls: List of urls to always returns 404 (whether file exists or not) has_multi_visit: Activate or not the multiple visits behavior + response_context_callback: Optional callback function taking request and + response context object (having headers, status_code, reason and cookies + as attributes) as parameters for extra processing """ @pytest.fixture def requests_mock_datadir(requests_mock, datadir): if not has_multi_visit: - cb = partial(get_response_cb, ignore_urls=ignore_urls, datadir=datadir) + cb = partial( + get_response_cb, + ignore_urls=ignore_urls, + datadir=datadir, + response_context_callback=response_context_callback, + ) requests_mock.get(re.compile("https?://"), body=cb) else: visits = {} diff --git a/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events b/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events new file mode 100644 index 0000000..c160a87 --- /dev/null +++ b/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events @@ -0,0 +1,56 @@ +[ + { + "tags": [ + { + "key": "environment", + "value": "production" + }, + { + "key": "swh.loader.origin_url", + "value": "opam+https://opam.ocaml.org/packages/cgi/" + } + ], + "context": { + "celery-job": { + "args": [], + "kwargs": { + "lister_instance_name": "opam.ocaml.org", + "lister_name": "opam", + "opam_instance": "opam.ocaml.org", + "opam_package": "cgi", + "opam_root": "/tmp/opam/", + "opam_url": "https://opam.ocaml.org", + "url": "opam+https://opam.ocaml.org/packages/cgi/" + }, + "task_name": "swh.loader.package.opam.tasks.LoadOpam" + } + } + }, + { + "tags": [ + { + "key": "environment", + "value": "production" + }, + { + "key": "swh.loader.origin_url", + "value": "opam+https://opam.ocaml.org/packages/combine/" + } + ], + "context": { + "celery-job": { + "args": [], + "kwargs": { + "lister_instance_name": "opam.ocaml.org", + "lister_name": "opam", + "opam_instance": "opam.ocaml.org", + "opam_package": "combine", + "opam_root": "/tmp/opam/", + "opam_url": "https://opam.ocaml.org", + "url": "opam+https://opam.ocaml.org/packages/combine/" + }, + "task_name": "swh.loader.package.opam.tasks.LoadOpam" + } + } + } +] \ No newline at end of file diff --git a/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events,cursor=0:100:0 b/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events,cursor=0:100:0 new file mode 100644 index 0000000..a24042e --- /dev/null +++ b/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events,cursor=0:100:0 @@ -0,0 +1,56 @@ +[ + { + "tags": [ + { + "key": "environment", + "value": "production" + }, + { + "key": "swh.loader.origin_url", + "value": "opam+https://opam.ocaml.org/packages/bitv/" + } + ], + "context": { + "celery-job": { + "args": [], + "kwargs": { + "lister_instance_name": "opam.ocaml.org", + "lister_name": "opam", + "opam_instance": "opam.ocaml.org", + "opam_package": "bitv", + "opam_root": "/tmp/opam/", + "opam_url": "https://opam.ocaml.org", + "url": "opam+https://opam.ocaml.org/packages/bitv/" + }, + "task_name": "swh.loader.package.opam.tasks.LoadOpam" + } + } + }, + { + "tags": [ + { + "key": "environment", + "value": "production" + }, + { + "key": "swh.loader.origin_url", + "value": "opam+https://opam.ocaml.org/packages/bdd/" + } + ], + "context": { + "celery-job": { + "args": [], + "kwargs": { + "lister_instance_name": "opam.ocaml.org", + "lister_name": "opam", + "opam_instance": "opam.ocaml.org", + "opam_package": "bdd", + "opam_root": "/tmp/opam/", + "opam_url": "https://opam.ocaml.org", + "url": "opam+https://opam.ocaml.org/packages/bdd/" + }, + "task_name": "swh.loader.package.opam.tasks.LoadOpam" + } + } + } +] \ No newline at end of file diff --git a/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events,cursor=0:200:0 b/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events,cursor=0:200:0 new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events,cursor=0:200:0 @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/swh/core/tests/test_cli_sentry.py b/swh/core/tests/test_cli_sentry.py new file mode 100644 index 0000000..c19953e --- /dev/null +++ b/swh/core/tests/test_cli_sentry.py @@ -0,0 +1,48 @@ +# Copyright (C) 2025 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +from click.testing import CliRunner +import pytest + +from swh.core.pytest_plugin import requests_mock_datadir_factory +from swh.core.tests.test_cli import assert_result + + +def response_context_callback(request, context): + """Add link headers to mocked Sentry REST API responses""" + base_url = f"{request.scheme}://{request.netloc}{request.path}" + if not request.query: + context.headers["Link"] = f'<{base_url}?cursor=0:100:0>; rel="next"' + else: + context.headers["Link"] = f'<{base_url}?cursor=0:200:0>; rel="next"' + + +requests_mock_sentry = requests_mock_datadir_factory( + response_context_callback=response_context_callback +) + + +@pytest.fixture +def swhmain(swhmain): + from swh.core.cli.sentry import sentry as swhsentry + + swhmain.add_command(swhsentry) + return swhmain + + +def test_sentry_extract_origin_urls(swhmain, requests_mock_sentry): + runner = CliRunner() + result = runner.invoke( + swhmain, ["sentry", "extract-origin-urls", "-t", "sentry-token", "-i", "112726"] + ) + assert_result(result) + expected_output = """ +opam+https://opam.ocaml.org/packages/bdd/ +opam+https://opam.ocaml.org/packages/bitv/ +opam+https://opam.ocaml.org/packages/cgi/ +opam+https://opam.ocaml.org/packages/combine/ +""" + assert result.output.strip() == expected_output.strip("\n") -- GitLab