diff --git a/swh/lister/pattern.py b/swh/lister/pattern.py
index dd3295c62dd1c6e04afe266d285321f2520246c0..852ab174e43069eb1a4450ffd6221fddff3f7265 100644
--- a/swh/lister/pattern.py
+++ b/swh/lister/pattern.py
@@ -337,21 +337,23 @@ class Lister(Generic[StateType, PageType]):
         pass
 
     def send_origins(self, origins: Iterable[model.ListedOrigin]) -> List[str]:
-        """Record a list of :class:`model.ListedOrigin` in the scheduler.
+        """Record the stream of valid :class:`model.ListedOrigin` in the scheduler.
+
+        This will filter out invalid urls prior to record origins to the scheduler.
 
         Returns:
           the list of origin URLs recorded in scheduler database
         """
-        valid_origins = []
-        for origin in origins:
-            if is_valid_origin_url(origin.url):
-                valid_origins.append(origin)
-            else:
-                logger.warning("Skipping invalid origin: %s", origin.url)
-
         recorded_origins = []
-        for batch_origins in grouper(valid_origins, n=1000):
-            ret = self.scheduler.record_listed_origins(batch_origins)
+        for origins in grouper(origins, n=1000):
+            valid_origins = []
+            for origin in origins:
+                if is_valid_origin_url(origin.url):
+                    valid_origins.append(origin)
+                else:
+                    logger.warning("Skipping invalid origin: %s", origin.url)
+
+            ret = self.scheduler.record_listed_origins(valid_origins)
             recorded_origins += [origin.url for origin in ret]
 
         return recorded_origins