Skip to content
Snippets Groups Projects
Verified Commit c16bcd57 authored by Antoine Lambert's avatar Antoine Lambert
Browse files

cli: Add 'sentry extract-scheduler-tasks' command

Usage: swh sentry extract-scheduler-tasks [OPTIONS]

  Extract scheduler task parameters from events.

  This command allows to extract scheduler task parameters from Sentry events
  related to a Software Heritage scheduler task and dumps a CSV file to stdout
  that can be consumed by the CLI command:

  $ swh scheduler task schedule --columns type --columns kwargs <csv_file>.

Options:
  -u, --sentry-url TEXT           Sentry URL  [default:
                                  https://sentry.softwareheritage.org]
  -t, --sentry-token TEXT         Bearer token required to communicate with
                                  Sentry API (can also be provided in
                                  SENTRY_TOKEN environment variable)
                                  [required]
  -i, --sentry-issue-number TEXT  Sentry issue number to extract origin URLs
                                  from its events  [required]
  -e, --environment TEXT          Filter on environment: production or
                                  staging, both are selected by default
  -h, --help                      Show this message and exit.
parent 965be8f9
No related branches found
No related tags found
No related merge requests found
Pipeline #12893 passed
......@@ -64,6 +64,7 @@ def _process_sentry_events_pages(
sentry_token,
sentry_issue_number,
events_page_process_callback: Callable[[List[Dict[str, Any]]], None],
full_sentry_responses: bool = False,
):
import requests
......@@ -71,6 +72,8 @@ def _process_sentry_events_pages(
sentry_issue_events_url = (
f"{sentry_api_base_url}/issues/{sentry_issue_number}/events/"
)
if full_sentry_responses:
sentry_issue_events_url += "?full=true"
while True:
response = requests.get(
sentry_issue_events_url, headers={"Authorization": f"Bearer {sentry_token}"}
......@@ -108,3 +111,44 @@ def extract_origin_urls(sentry_url, sentry_token, sentry_issue_number, environme
for origin_url in sorted(origin_urls):
click.echo(origin_url)
@sentry.command(name="extract-scheduler-tasks", context_settings=CONTEXT_SETTINGS)
@common_options
def extract_scheduler_tasks(sentry_url, sentry_token, sentry_issue_number, environment):
"""Extract scheduler task parameters from events.
This command allows to extract scheduler task parameters from Sentry events related to
a Software Heritage scheduler task and dumps a CSV file to stdout that can be consumed
by the CLI command:
$ swh scheduler task schedule --columns type --columns kwargs <csv_file>.
"""
import csv
import json
import sys
task_params = {}
def _extract_scheduler_tasks(events):
for event in events:
celery_job = event.get("context", {}).get("celery-job", {})
task_name = celery_job.get("task_name")
task_param = celery_job.get("kwargs")
if task_param:
key = tuple([task_name] + list(task_param.values()))
task_params[key] = (task_name, task_param)
_process_sentry_events_pages(
sentry_url,
sentry_token,
sentry_issue_number,
_extract_scheduler_tasks,
full_sentry_responses=True,
)
csv_writer = csv.writer(sys.stdout)
for task_type, task_param in sorted(
task_params.values(), key=lambda p: p[1].get("url", "")
):
csv_writer.writerow([task_type, json.dumps(task_param)])
[
{
"tags": [
{
"key": "environment",
"value": "production"
},
{
"key": "swh.loader.origin_url",
"value": "opam+https://opam.ocaml.org/packages/cgi/"
}
],
"context": {
"celery-job": {
"args": [],
"kwargs": {
"lister_instance_name": "opam.ocaml.org",
"lister_name": "opam",
"opam_instance": "opam.ocaml.org",
"opam_package": "cgi",
"opam_root": "/tmp/opam/",
"opam_url": "https://opam.ocaml.org",
"url": "opam+https://opam.ocaml.org/packages/cgi/"
},
"task_name": "swh.loader.package.opam.tasks.LoadOpam"
}
}
},
{
"tags": [
{
"key": "environment",
"value": "production"
},
{
"key": "swh.loader.origin_url",
"value": "opam+https://opam.ocaml.org/packages/combine/"
}
],
"context": {
"celery-job": {
"args": [],
"kwargs": {
"lister_instance_name": "opam.ocaml.org",
"lister_name": "opam",
"opam_instance": "opam.ocaml.org",
"opam_package": "combine",
"opam_root": "/tmp/opam/",
"opam_url": "https://opam.ocaml.org",
"url": "opam+https://opam.ocaml.org/packages/combine/"
},
"task_name": "swh.loader.package.opam.tasks.LoadOpam"
}
}
}
]
\ No newline at end of file
api_0_issues_112726_events,full=true
\ No newline at end of file
[
{
"tags": [
{
"key": "environment",
"value": "production"
},
{
"key": "swh.loader.origin_url",
"value": "opam+https://opam.ocaml.org/packages/cgi/"
}
],
"context": {
"celery-job": {
"args": [],
"kwargs": {
"lister_instance_name": "opam.ocaml.org",
"lister_name": "opam",
"opam_instance": "opam.ocaml.org",
"opam_package": "cgi",
"opam_root": "/tmp/opam/",
"opam_url": "https://opam.ocaml.org",
"url": "opam+https://opam.ocaml.org/packages/cgi/"
},
"task_name": "swh.loader.package.opam.tasks.LoadOpam"
}
}
},
{
"tags": [
{
"key": "environment",
"value": "production"
},
{
"key": "swh.loader.origin_url",
"value": "opam+https://opam.ocaml.org/packages/combine/"
}
],
"context": {
"celery-job": {
"args": [],
"kwargs": {
"lister_instance_name": "opam.ocaml.org",
"lister_name": "opam",
"opam_instance": "opam.ocaml.org",
"opam_package": "combine",
"opam_root": "/tmp/opam/",
"opam_url": "https://opam.ocaml.org",
"url": "opam+https://opam.ocaml.org/packages/combine/"
},
"task_name": "swh.loader.package.opam.tasks.LoadOpam"
}
}
}
]
\ No newline at end of file
swh.loader.package.opam.tasks.LoadOpam,"{""lister_instance_name"": ""opam.ocaml.org"", ""lister_name"": ""opam"", ""opam_instance"": ""opam.ocaml.org"", ""opam_package"": ""bdd"", ""opam_root"": ""/tmp/opam/"", ""opam_url"": ""https://opam.ocaml.org"", ""url"": ""opam+https://opam.ocaml.org/packages/bdd/""}"
swh.loader.package.opam.tasks.LoadOpam,"{""lister_instance_name"": ""opam.ocaml.org"", ""lister_name"": ""opam"", ""opam_instance"": ""opam.ocaml.org"", ""opam_package"": ""bitv"", ""opam_root"": ""/tmp/opam/"", ""opam_url"": ""https://opam.ocaml.org"", ""url"": ""opam+https://opam.ocaml.org/packages/bitv/""}"
swh.loader.package.opam.tasks.LoadOpam,"{""lister_instance_name"": ""opam.ocaml.org"", ""lister_name"": ""opam"", ""opam_instance"": ""opam.ocaml.org"", ""opam_package"": ""cgi"", ""opam_root"": ""/tmp/opam/"", ""opam_url"": ""https://opam.ocaml.org"", ""url"": ""opam+https://opam.ocaml.org/packages/cgi/""}"
swh.loader.package.opam.tasks.LoadOpam,"{""lister_instance_name"": ""opam.ocaml.org"", ""lister_name"": ""opam"", ""opam_instance"": ""opam.ocaml.org"", ""opam_package"": ""combine"", ""opam_root"": ""/tmp/opam/"", ""opam_url"": ""https://opam.ocaml.org"", ""url"": ""opam+https://opam.ocaml.org/packages/combine/""}"
\ No newline at end of file
......@@ -3,6 +3,7 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
from click.testing import CliRunner
import pytest
......@@ -14,7 +15,7 @@ from swh.core.tests.test_cli import assert_result
def response_context_callback(request, context):
"""Add link headers to mocked Sentry REST API responses"""
base_url = f"{request.scheme}://{request.netloc}{request.path}"
if not request.query:
if "cursor" not in request.query:
context.headers["Link"] = f'<{base_url}?cursor=0:100:0>; rel="next"'
else:
context.headers["Link"] = f'<{base_url}?cursor=0:200:0>; rel="next"'
......@@ -24,6 +25,8 @@ requests_mock_sentry = requests_mock_datadir_factory(
response_context_callback=response_context_callback
)
SENTRY_ISSUE_ID = "112726"
@pytest.fixture
def swhmain(swhmain):
......@@ -36,7 +39,8 @@ def swhmain(swhmain):
def test_sentry_extract_origin_urls(swhmain, requests_mock_sentry):
runner = CliRunner()
result = runner.invoke(
swhmain, ["sentry", "extract-origin-urls", "-t", "sentry-token", "-i", "112726"]
swhmain,
["sentry", "extract-origin-urls", "-t", "sentry-token", "-i", SENTRY_ISSUE_ID],
)
assert_result(result)
expected_output = """
......@@ -46,3 +50,22 @@ opam+https://opam.ocaml.org/packages/cgi/
opam+https://opam.ocaml.org/packages/combine/
"""
assert result.output.strip() == expected_output.strip("\n")
def test_sentry_extract_scheduler_tasks(swhmain, requests_mock_sentry, datadir):
runner = CliRunner()
result = runner.invoke(
swhmain,
[
"sentry",
"extract-scheduler-tasks",
"-t",
"sentry-token",
"-i",
SENTRY_ISSUE_ID,
],
)
assert_result(result)
csv_tasks_file = os.path.join(datadir, "sentry_expected_scheduler_tasks.csv")
with open(csv_tasks_file, "r") as tasks_csv:
assert result.output.strip() == tasks_csv.read()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment