diff --git a/sql/swh-archiver-func.sql b/sql/swh-archiver-func.sql
index 551026278c5be5e5a0117406d372d1b4bcd5204a..5113056375d19a5b1ba4f08e2d51ec57222a4a41 100644
--- a/sql/swh-archiver-func.sql
+++ b/sql/swh-archiver-func.sql
@@ -29,3 +29,20 @@ end
 $$;
 
 COMMENT ON FUNCTION swh_content_archive_missing(text) IS 'Filter missing data from a specific backend';
+
+create or replace function swh_content_archive_unknown()
+    returns setof sha1
+    language plpgsql
+as $$
+begin
+    return query
+        select content_id
+        from tmp_content_archive tmp where not exists (
+            select 1
+            from content_archive c
+            where tmp.content_id = c.content_id
+        );
+end
+$$;
+
+COMMENT ON FUNCTION swh_content_archive_unknown() IS 'Retrieve list of unknown sha1s';
diff --git a/sql/upgrades/005.sql b/sql/upgrades/005.sql
new file mode 100644
index 0000000000000000000000000000000000000000..bc50631c13de461564783e5bf3db9a61998332d6
--- /dev/null
+++ b/sql/upgrades/005.sql
@@ -0,0 +1,24 @@
+-- SWH DB schema upgrade
+-- from_version: 4
+-- to_version: 5
+-- description: List unknown sha1s from content_archive
+
+INSERT INTO dbversion(version, release, description)
+VALUES(5, now(), 'Work In Progress');
+
+create or replace function swh_content_archive_unknown()
+    returns setof sha1
+    language plpgsql
+as $$
+begin
+    return query
+        select content_id
+        from tmp_content_archive tmp where not exists (
+            select 1
+            from content_archive c
+            where tmp.content_id = c.content_id
+        );
+end
+$$;
+
+COMMENT ON FUNCTION swh_content_archive_unknown() IS 'Retrieve list of unknown sha1';
diff --git a/swh/archiver/db.py b/swh/archiver/db.py
index a4611d96b6d20b9dea68f0502396bcfd0e972cd9..b1156aef054144864a3a7598603800f50008ab78 100644
--- a/swh/archiver/db.py
+++ b/swh/archiver/db.py
@@ -196,6 +196,14 @@ class ArchiverDb(BaseDb):
                     (backend_name,))
         yield from cursor_to_bytes(cur)
 
+    def content_archive_get_unknown(self, cur=None):
+        """Retrieve unknown sha1 from archiver db.
+
+        """
+        cur = self._cursor(cur)
+        cur.execute('select * from swh_content_archive_unknown()')
+        yield from cursor_to_bytes(cur)
+
     def content_archive_insert(self, content_id, source, status, cur=None):
         """Insert a new entry in the db for the content_id.
 
diff --git a/swh/archiver/storage.py b/swh/archiver/storage.py
index 1336c17afcd9cd71c06b7b4dc459d00801d8e84e..b207a704721f623bc858cb163ca8f835a18617de 100644
--- a/swh/archiver/storage.py
+++ b/swh/archiver/storage.py
@@ -98,14 +98,14 @@ class ArchiverStorage():
 
     @db_transaction_generator
     def content_archive_get_missing(self, content_ids, backend_name, cur=None):
-        """Retrieve the list of missing copies from source_name.
+        """Retrieve missing sha1s from source_name.
 
         Args:
             content_ids ([sha1s]): list of sha1s to test
             source_name (str): Name of the backend to check for content
 
         Yields:
-            List of ids effectively missing from backend_name
+            missing sha1s from backend_name
 
         """
         db = self.db
@@ -117,6 +117,26 @@ class ArchiverStorage():
         for content_id in db.content_archive_get_missing(backend_name, cur):
             yield content_id[0]
 
+    @db_transaction_generator
+    def content_archive_get_unknown(self, content_ids, cur=None):
+        """Retrieve unknown sha1s from content_archive.
+
+        Args:
+            content_ids ([sha1s]): list of sha1s to test
+
+        Yields:
+            Unknown sha1s from content_archive
+
+        """
+        db = self.db
+
+        db.mktemp_content_archive()
+
+        db.copy_to(content_ids, 'tmp_content_archive', ['content_id'], cur)
+
+        for content_id in db.content_archive_get_unknown(cur):
+            yield content_id[0]
+
     @db_transaction
     def content_archive_update(self, content_id, archive_id,
                                new_status=None, cur=None):