From c16bcd571d382d9fbcce9e8a53d7ac419eeb0041 Mon Sep 17 00:00:00 2001 From: Antoine Lambert <anlambert@softwareheritage.org> Date: Fri, 24 Jan 2025 15:57:14 +0100 Subject: [PATCH] cli: Add 'sentry extract-scheduler-tasks' command Usage: swh sentry extract-scheduler-tasks [OPTIONS] Extract scheduler task parameters from events. This command allows to extract scheduler task parameters from Sentry events related to a Software Heritage scheduler task and dumps a CSV file to stdout that can be consumed by the CLI command: $ swh scheduler task schedule --columns type --columns kwargs <csv_file>. Options: -u, --sentry-url TEXT Sentry URL [default: https://sentry.softwareheritage.org] -t, --sentry-token TEXT Bearer token required to communicate with Sentry API (can also be provided in SENTRY_TOKEN environment variable) [required] -i, --sentry-issue-number TEXT Sentry issue number to extract origin URLs from its events [required] -e, --environment TEXT Filter on environment: production or staging, both are selected by default -h, --help Show this message and exit. --- swh/core/cli/sentry.py | 44 ++++++++++++++ .../api_0_issues_112726_events | 57 +------------------ .../api_0_issues_112726_events,full=true | 56 ++++++++++++++++++ .../data/sentry_expected_scheduler_tasks.csv | 4 ++ swh/core/tests/test_cli_sentry.py | 27 ++++++++- 5 files changed, 130 insertions(+), 58 deletions(-) mode change 100644 => 120000 swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events create mode 100644 swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events,full=true create mode 100644 swh/core/tests/data/sentry_expected_scheduler_tasks.csv diff --git a/swh/core/cli/sentry.py b/swh/core/cli/sentry.py index 41b15d7..290853a 100644 --- a/swh/core/cli/sentry.py +++ b/swh/core/cli/sentry.py @@ -64,6 +64,7 @@ def _process_sentry_events_pages( sentry_token, sentry_issue_number, events_page_process_callback: Callable[[List[Dict[str, Any]]], None], + full_sentry_responses: bool = False, ): import requests @@ -71,6 +72,8 @@ def _process_sentry_events_pages( sentry_issue_events_url = ( f"{sentry_api_base_url}/issues/{sentry_issue_number}/events/" ) + if full_sentry_responses: + sentry_issue_events_url += "?full=true" while True: response = requests.get( sentry_issue_events_url, headers={"Authorization": f"Bearer {sentry_token}"} @@ -108,3 +111,44 @@ def extract_origin_urls(sentry_url, sentry_token, sentry_issue_number, environme for origin_url in sorted(origin_urls): click.echo(origin_url) + + +@sentry.command(name="extract-scheduler-tasks", context_settings=CONTEXT_SETTINGS) +@common_options +def extract_scheduler_tasks(sentry_url, sentry_token, sentry_issue_number, environment): + """Extract scheduler task parameters from events. + + This command allows to extract scheduler task parameters from Sentry events related to + a Software Heritage scheduler task and dumps a CSV file to stdout that can be consumed + by the CLI command: + + $ swh scheduler task schedule --columns type --columns kwargs <csv_file>. + """ + import csv + import json + import sys + + task_params = {} + + def _extract_scheduler_tasks(events): + for event in events: + celery_job = event.get("context", {}).get("celery-job", {}) + task_name = celery_job.get("task_name") + task_param = celery_job.get("kwargs") + if task_param: + key = tuple([task_name] + list(task_param.values())) + task_params[key] = (task_name, task_param) + + _process_sentry_events_pages( + sentry_url, + sentry_token, + sentry_issue_number, + _extract_scheduler_tasks, + full_sentry_responses=True, + ) + + csv_writer = csv.writer(sys.stdout) + for task_type, task_param in sorted( + task_params.values(), key=lambda p: p[1].get("url", "") + ): + csv_writer.writerow([task_type, json.dumps(task_param)]) diff --git a/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events b/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events deleted file mode 100644 index c160a87..0000000 --- a/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events +++ /dev/null @@ -1,56 +0,0 @@ -[ - { - "tags": [ - { - "key": "environment", - "value": "production" - }, - { - "key": "swh.loader.origin_url", - "value": "opam+https://opam.ocaml.org/packages/cgi/" - } - ], - "context": { - "celery-job": { - "args": [], - "kwargs": { - "lister_instance_name": "opam.ocaml.org", - "lister_name": "opam", - "opam_instance": "opam.ocaml.org", - "opam_package": "cgi", - "opam_root": "/tmp/opam/", - "opam_url": "https://opam.ocaml.org", - "url": "opam+https://opam.ocaml.org/packages/cgi/" - }, - "task_name": "swh.loader.package.opam.tasks.LoadOpam" - } - } - }, - { - "tags": [ - { - "key": "environment", - "value": "production" - }, - { - "key": "swh.loader.origin_url", - "value": "opam+https://opam.ocaml.org/packages/combine/" - } - ], - "context": { - "celery-job": { - "args": [], - "kwargs": { - "lister_instance_name": "opam.ocaml.org", - "lister_name": "opam", - "opam_instance": "opam.ocaml.org", - "opam_package": "combine", - "opam_root": "/tmp/opam/", - "opam_url": "https://opam.ocaml.org", - "url": "opam+https://opam.ocaml.org/packages/combine/" - }, - "task_name": "swh.loader.package.opam.tasks.LoadOpam" - } - } - } -] \ No newline at end of file diff --git a/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events b/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events new file mode 120000 index 0000000..abf5175 --- /dev/null +++ b/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events @@ -0,0 +1 @@ +api_0_issues_112726_events,full=true \ No newline at end of file diff --git a/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events,full=true b/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events,full=true new file mode 100644 index 0000000..c160a87 --- /dev/null +++ b/swh/core/tests/data/https_sentry.softwareheritage.org/api_0_issues_112726_events,full=true @@ -0,0 +1,56 @@ +[ + { + "tags": [ + { + "key": "environment", + "value": "production" + }, + { + "key": "swh.loader.origin_url", + "value": "opam+https://opam.ocaml.org/packages/cgi/" + } + ], + "context": { + "celery-job": { + "args": [], + "kwargs": { + "lister_instance_name": "opam.ocaml.org", + "lister_name": "opam", + "opam_instance": "opam.ocaml.org", + "opam_package": "cgi", + "opam_root": "/tmp/opam/", + "opam_url": "https://opam.ocaml.org", + "url": "opam+https://opam.ocaml.org/packages/cgi/" + }, + "task_name": "swh.loader.package.opam.tasks.LoadOpam" + } + } + }, + { + "tags": [ + { + "key": "environment", + "value": "production" + }, + { + "key": "swh.loader.origin_url", + "value": "opam+https://opam.ocaml.org/packages/combine/" + } + ], + "context": { + "celery-job": { + "args": [], + "kwargs": { + "lister_instance_name": "opam.ocaml.org", + "lister_name": "opam", + "opam_instance": "opam.ocaml.org", + "opam_package": "combine", + "opam_root": "/tmp/opam/", + "opam_url": "https://opam.ocaml.org", + "url": "opam+https://opam.ocaml.org/packages/combine/" + }, + "task_name": "swh.loader.package.opam.tasks.LoadOpam" + } + } + } +] \ No newline at end of file diff --git a/swh/core/tests/data/sentry_expected_scheduler_tasks.csv b/swh/core/tests/data/sentry_expected_scheduler_tasks.csv new file mode 100644 index 0000000..442ff82 --- /dev/null +++ b/swh/core/tests/data/sentry_expected_scheduler_tasks.csv @@ -0,0 +1,4 @@ +swh.loader.package.opam.tasks.LoadOpam,"{""lister_instance_name"": ""opam.ocaml.org"", ""lister_name"": ""opam"", ""opam_instance"": ""opam.ocaml.org"", ""opam_package"": ""bdd"", ""opam_root"": ""/tmp/opam/"", ""opam_url"": ""https://opam.ocaml.org"", ""url"": ""opam+https://opam.ocaml.org/packages/bdd/""}" +swh.loader.package.opam.tasks.LoadOpam,"{""lister_instance_name"": ""opam.ocaml.org"", ""lister_name"": ""opam"", ""opam_instance"": ""opam.ocaml.org"", ""opam_package"": ""bitv"", ""opam_root"": ""/tmp/opam/"", ""opam_url"": ""https://opam.ocaml.org"", ""url"": ""opam+https://opam.ocaml.org/packages/bitv/""}" +swh.loader.package.opam.tasks.LoadOpam,"{""lister_instance_name"": ""opam.ocaml.org"", ""lister_name"": ""opam"", ""opam_instance"": ""opam.ocaml.org"", ""opam_package"": ""cgi"", ""opam_root"": ""/tmp/opam/"", ""opam_url"": ""https://opam.ocaml.org"", ""url"": ""opam+https://opam.ocaml.org/packages/cgi/""}" +swh.loader.package.opam.tasks.LoadOpam,"{""lister_instance_name"": ""opam.ocaml.org"", ""lister_name"": ""opam"", ""opam_instance"": ""opam.ocaml.org"", ""opam_package"": ""combine"", ""opam_root"": ""/tmp/opam/"", ""opam_url"": ""https://opam.ocaml.org"", ""url"": ""opam+https://opam.ocaml.org/packages/combine/""}" \ No newline at end of file diff --git a/swh/core/tests/test_cli_sentry.py b/swh/core/tests/test_cli_sentry.py index c19953e..b837b24 100644 --- a/swh/core/tests/test_cli_sentry.py +++ b/swh/core/tests/test_cli_sentry.py @@ -3,6 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import os from click.testing import CliRunner import pytest @@ -14,7 +15,7 @@ from swh.core.tests.test_cli import assert_result def response_context_callback(request, context): """Add link headers to mocked Sentry REST API responses""" base_url = f"{request.scheme}://{request.netloc}{request.path}" - if not request.query: + if "cursor" not in request.query: context.headers["Link"] = f'<{base_url}?cursor=0:100:0>; rel="next"' else: context.headers["Link"] = f'<{base_url}?cursor=0:200:0>; rel="next"' @@ -24,6 +25,8 @@ requests_mock_sentry = requests_mock_datadir_factory( response_context_callback=response_context_callback ) +SENTRY_ISSUE_ID = "112726" + @pytest.fixture def swhmain(swhmain): @@ -36,7 +39,8 @@ def swhmain(swhmain): def test_sentry_extract_origin_urls(swhmain, requests_mock_sentry): runner = CliRunner() result = runner.invoke( - swhmain, ["sentry", "extract-origin-urls", "-t", "sentry-token", "-i", "112726"] + swhmain, + ["sentry", "extract-origin-urls", "-t", "sentry-token", "-i", SENTRY_ISSUE_ID], ) assert_result(result) expected_output = """ @@ -46,3 +50,22 @@ opam+https://opam.ocaml.org/packages/cgi/ opam+https://opam.ocaml.org/packages/combine/ """ assert result.output.strip() == expected_output.strip("\n") + + +def test_sentry_extract_scheduler_tasks(swhmain, requests_mock_sentry, datadir): + runner = CliRunner() + result = runner.invoke( + swhmain, + [ + "sentry", + "extract-scheduler-tasks", + "-t", + "sentry-token", + "-i", + SENTRY_ISSUE_ID, + ], + ) + assert_result(result) + csv_tasks_file = os.path.join(datadir, "sentry_expected_scheduler_tasks.csv") + with open(csv_tasks_file, "r") as tasks_csv: + assert result.output.strip() == tasks_csv.read() -- GitLab