From 63fdda00f5f923294ebae3565c26d1741a001cab Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" <ardumont@softwareheritage.org> Date: Thu, 3 Jun 2021 16:03:26 +0200 Subject: [PATCH] send-to-celery: Add more options to allow scheduling of edge cases In the non optimal case, we may want to trigger specific case (not-yet enabled origins, origin from specific lister...). Related to T3350 --- swh/scheduler/backend.py | 8 +++++++- swh/scheduler/cli/origin.py | 29 +++++++++++++++++++++++++++-- swh/scheduler/interface.py | 7 +++++++ 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py index e4224484..19b48a01 100644 --- a/swh/scheduler/backend.py +++ b/swh/scheduler/backend.py @@ -342,6 +342,8 @@ class SchedulerBackend: visit_type: str, count: int, policy: str, + enabled: bool = True, + lister_uuid: Optional[str] = None, timestamp: Optional[datetime.datetime] = None, scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7), failed_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=14), @@ -363,7 +365,7 @@ class SchedulerBackend: common_table_expressions: List[Tuple[str, str]] = [] # "NOT enabled" = the lister said the origin no longer exists - where_clauses.append("enabled") + where_clauses.append("enabled" if enabled else "not enabled") # Only schedule visits of the given type where_clauses.append("visit_type = %s") @@ -466,6 +468,10 @@ class SchedulerBackend: else: table = "listed_origins" + if lister_uuid: + where_clauses.append("lister_id = %s") + query_args.append(lister_uuid) + # fmt: off common_table_expressions.insert(0, ("selected_origins", f""" SELECT diff --git a/swh/scheduler/cli/origin.py b/swh/scheduler/cli/origin.py index cb926d72..5f5d5d6c 100644 --- a/swh/scheduler/cli/origin.py +++ b/swh/scheduler/cli/origin.py @@ -155,10 +155,30 @@ def schedule_next(ctx, policy: str, type: str, count: int): @click.option( "--tablesample", help="Table sampling percentage", type=float, ) +@click.option( + "--only-enabled/--only-disabled", + "enabled", + is_flag=True, + default=True, + help="""Determine whether we want to scheduled enabled or disabled origins. As default, we + want to reasonably deal with enabled origins. For some edge case though, we + might want the disabled ones.""", +) +@click.option( + "--lister-uuid", + default=None, + help="Limit origins to those listed from such lister", +) @click.argument("type", type=str) @click.pass_context def send_to_celery( - ctx, policy: str, queue: Optional[str], tablesample: Optional[float], type: str + ctx, + policy: str, + queue: Optional[str], + tablesample: Optional[float], + type: str, + enabled: bool, + lister_uuid: Optional[str] = None, ): """Send the next origin visits of the TYPE loader to celery, filling the queue.""" from kombu.utils.uuid import uuid @@ -176,7 +196,12 @@ def send_to_celery( click.echo(f"{num_tasks} slots available in celery queue") origins = scheduler.grab_next_visits( - type, num_tasks, policy=policy, tablesample=tablesample + type, + num_tasks, + policy=policy, + tablesample=tablesample, + enabled=enabled, + lister_uuid=lister_uuid, ) click.echo(f"{len(origins)} visits to send to celery") diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py index 2e93250f..811009f1 100644 --- a/swh/scheduler/interface.py +++ b/swh/scheduler/interface.py @@ -394,6 +394,8 @@ class SchedulerInterface(Protocol): visit_type: str, count: int, policy: str, + enabled: bool = True, + lister_uuid: Optional[str] = None, timestamp: Optional[datetime.datetime] = None, scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7), failed_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=14), @@ -410,6 +412,10 @@ class SchedulerInterface(Protocol): visit_type: type of visits to schedule count: number of visits to schedule policy: the scheduling policy used to select which visits to schedule + enabled: Determine whether we want to list enabled or disabled origins. As + default, we want reasonably enabled origins. For some edge case, we might + want the others. + lister_uuid: Determine the list of origins listed from the lister with uuid timestamp: the mocked timestamp at which we're recording that the visits are being scheduled (defaults to the current time) scheduled_cooldown: the minimal interval before which we can schedule @@ -420,6 +426,7 @@ class SchedulerInterface(Protocol): not_found origin tablesample: the percentage of the table on which we run the query (None: no sampling) + """ ... -- GitLab