From 63fdda00f5f923294ebae3565c26d1741a001cab Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <ardumont@softwareheritage.org>
Date: Thu, 3 Jun 2021 16:03:26 +0200
Subject: [PATCH] send-to-celery: Add more options to allow scheduling of edge
 cases

In the non optimal case, we may want to trigger specific case (not-yet enabled origins,
origin from specific lister...).

Related to T3350
---
 swh/scheduler/backend.py    |  8 +++++++-
 swh/scheduler/cli/origin.py | 29 +++++++++++++++++++++++++++--
 swh/scheduler/interface.py  |  7 +++++++
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py
index e4224484..19b48a01 100644
--- a/swh/scheduler/backend.py
+++ b/swh/scheduler/backend.py
@@ -342,6 +342,8 @@ class SchedulerBackend:
         visit_type: str,
         count: int,
         policy: str,
+        enabled: bool = True,
+        lister_uuid: Optional[str] = None,
         timestamp: Optional[datetime.datetime] = None,
         scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7),
         failed_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=14),
@@ -363,7 +365,7 @@ class SchedulerBackend:
         common_table_expressions: List[Tuple[str, str]] = []
 
         # "NOT enabled" = the lister said the origin no longer exists
-        where_clauses.append("enabled")
+        where_clauses.append("enabled" if enabled else "not enabled")
 
         # Only schedule visits of the given type
         where_clauses.append("visit_type = %s")
@@ -466,6 +468,10 @@ class SchedulerBackend:
         else:
             table = "listed_origins"
 
+        if lister_uuid:
+            where_clauses.append("lister_id = %s")
+            query_args.append(lister_uuid)
+
         # fmt: off
         common_table_expressions.insert(0, ("selected_origins", f"""
             SELECT
diff --git a/swh/scheduler/cli/origin.py b/swh/scheduler/cli/origin.py
index cb926d72..5f5d5d6c 100644
--- a/swh/scheduler/cli/origin.py
+++ b/swh/scheduler/cli/origin.py
@@ -155,10 +155,30 @@ def schedule_next(ctx, policy: str, type: str, count: int):
 @click.option(
     "--tablesample", help="Table sampling percentage", type=float,
 )
+@click.option(
+    "--only-enabled/--only-disabled",
+    "enabled",
+    is_flag=True,
+    default=True,
+    help="""Determine whether we want to scheduled enabled or disabled origins. As default, we
+            want to reasonably deal with enabled origins. For some edge case though, we
+            might want the disabled ones.""",
+)
+@click.option(
+    "--lister-uuid",
+    default=None,
+    help="Limit origins to those listed from such lister",
+)
 @click.argument("type", type=str)
 @click.pass_context
 def send_to_celery(
-    ctx, policy: str, queue: Optional[str], tablesample: Optional[float], type: str
+    ctx,
+    policy: str,
+    queue: Optional[str],
+    tablesample: Optional[float],
+    type: str,
+    enabled: bool,
+    lister_uuid: Optional[str] = None,
 ):
     """Send the next origin visits of the TYPE loader to celery, filling the queue."""
     from kombu.utils.uuid import uuid
@@ -176,7 +196,12 @@ def send_to_celery(
 
     click.echo(f"{num_tasks} slots available in celery queue")
     origins = scheduler.grab_next_visits(
-        type, num_tasks, policy=policy, tablesample=tablesample
+        type,
+        num_tasks,
+        policy=policy,
+        tablesample=tablesample,
+        enabled=enabled,
+        lister_uuid=lister_uuid,
     )
 
     click.echo(f"{len(origins)} visits to send to celery")
diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py
index 2e93250f..811009f1 100644
--- a/swh/scheduler/interface.py
+++ b/swh/scheduler/interface.py
@@ -394,6 +394,8 @@ class SchedulerInterface(Protocol):
         visit_type: str,
         count: int,
         policy: str,
+        enabled: bool = True,
+        lister_uuid: Optional[str] = None,
         timestamp: Optional[datetime.datetime] = None,
         scheduled_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=7),
         failed_cooldown: Optional[datetime.timedelta] = datetime.timedelta(days=14),
@@ -410,6 +412,10 @@ class SchedulerInterface(Protocol):
           visit_type: type of visits to schedule
           count: number of visits to schedule
           policy: the scheduling policy used to select which visits to schedule
+          enabled: Determine whether we want to list enabled or disabled origins. As
+            default, we want reasonably enabled origins. For some edge case, we might
+            want the others.
+          lister_uuid: Determine the list of origins listed from the lister with uuid
           timestamp: the mocked timestamp at which we're recording that the visits are
             being scheduled (defaults to the current time)
           scheduled_cooldown: the minimal interval before which we can schedule
@@ -420,6 +426,7 @@ class SchedulerInterface(Protocol):
             not_found origin
           tablesample: the percentage of the table on which we run the query
             (None: no sampling)
+
         """
         ...
 
-- 
GitLab