From d049805b204bda31d80f161d6afc2a38a45667ba Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com>
Date: Thu, 15 Sep 2016 16:24:53 +0200
Subject: [PATCH] Archiver: Adapt ArchiverToBackendDirector to latest storage
 api

---
 swh/archiver/director.py | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/swh/archiver/director.py b/swh/archiver/director.py
index df52a1d..2a27334 100644
--- a/swh/archiver/director.py
+++ b/swh/archiver/director.py
@@ -6,7 +6,7 @@
 import abc
 import click
 
-from swh.core import config
+from swh.core import config, utils
 from swh.scheduler.celery_backend.config import app
 
 from . import tasks  # noqa
@@ -177,11 +177,10 @@ class ArchiverToBackendDirector(ArchiverDirectorBase):
         self.storage = Storage(storage['dbconn'], storage['objroot'])
         self.destination_host = self.config['destination']['host']
 
-    def read_cache_content_from_storage(self, last_content=None):
-        for content in self.storage.cache_content_get(
-                last_content=last_content,
-                limit=self.config['batch_max_size']):
-            yield {'content_id': content['sha1']}
+    def read_cache_content_from_storage_by_batch(self, batch_max_size):
+        for contents in utils.grouper(self.storage.cache_content_get(),
+                                      batch_max_size):
+            yield contents
 
     def get_contents_to_archive(self):
         """Create batch of contents that needs to be archived
@@ -190,24 +189,21 @@ class ArchiverToBackendDirector(ArchiverDirectorBase):
             sha1 of content to archive
 
          """
-        last_content = None
-        while True:
-            content_ids = list(
-                self.read_cache_content_from_storage(last_content))
+        for contents in self.read_cache_content_from_storage_by_batch(
+                self.config['batch_max_size']):
+            content_ids = [{'content_id': c['sha1']} for c in contents]
 
             if not content_ids:
-                return
-
-            # Keep the last known content
-            last_content = content_ids[-1]['content_id']
+                continue
 
+            # Filter the known
             content_ids = list(
                 self.archiver_storage.content_archive_get_missing(
                     content_ids=content_ids,
                     backend_name=self.destination_host))
 
             if not content_ids:
-                return
+                continue
 
             print('Sending %s new contents for archive' % len(content_ids))
 
-- 
GitLab