Skip to content
Snippets Groups Projects
Commit ed908fef authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

sql/archiver/schema: Filter unknown sha1s from content_archive endpoint

parent 7b463c6c
No related branches found
No related tags found
No related merge requests found
...@@ -29,3 +29,20 @@ end ...@@ -29,3 +29,20 @@ end
$$; $$;
COMMENT ON FUNCTION swh_content_archive_missing(text) IS 'Filter missing data from a specific backend'; COMMENT ON FUNCTION swh_content_archive_missing(text) IS 'Filter missing data from a specific backend';
create or replace function swh_content_archive_unknown()
returns setof sha1
language plpgsql
as $$
begin
return query
select content_id
from tmp_content_archive tmp where not exists (
select 1
from content_archive c
where tmp.content_id = c.content_id
);
end
$$;
COMMENT ON FUNCTION swh_content_archive_unknown() IS 'Retrieve list of unknown sha1s';
-- SWH DB schema upgrade
-- from_version: 4
-- to_version: 5
-- description: List unknown sha1s from content_archive
INSERT INTO dbversion(version, release, description)
VALUES(5, now(), 'Work In Progress');
create or replace function swh_content_archive_unknown()
returns setof sha1
language plpgsql
as $$
begin
return query
select content_id
from tmp_content_archive tmp where not exists (
select 1
from content_archive c
where tmp.content_id = c.content_id
);
end
$$;
COMMENT ON FUNCTION swh_content_archive_unknown() IS 'Retrieve list of unknown sha1';
...@@ -196,6 +196,14 @@ class ArchiverDb(BaseDb): ...@@ -196,6 +196,14 @@ class ArchiverDb(BaseDb):
(backend_name,)) (backend_name,))
yield from cursor_to_bytes(cur) yield from cursor_to_bytes(cur)
def content_archive_get_unknown(self, cur=None):
"""Retrieve unknown sha1 from archiver db.
"""
cur = self._cursor(cur)
cur.execute('select * from swh_content_archive_unknown()')
yield from cursor_to_bytes(cur)
def content_archive_insert(self, content_id, source, status, cur=None): def content_archive_insert(self, content_id, source, status, cur=None):
"""Insert a new entry in the db for the content_id. """Insert a new entry in the db for the content_id.
......
...@@ -98,14 +98,14 @@ class ArchiverStorage(): ...@@ -98,14 +98,14 @@ class ArchiverStorage():
@db_transaction_generator @db_transaction_generator
def content_archive_get_missing(self, content_ids, backend_name, cur=None): def content_archive_get_missing(self, content_ids, backend_name, cur=None):
"""Retrieve the list of missing copies from source_name. """Retrieve missing sha1s from source_name.
Args: Args:
content_ids ([sha1s]): list of sha1s to test content_ids ([sha1s]): list of sha1s to test
source_name (str): Name of the backend to check for content source_name (str): Name of the backend to check for content
Yields: Yields:
List of ids effectively missing from backend_name missing sha1s from backend_name
""" """
db = self.db db = self.db
...@@ -117,6 +117,26 @@ class ArchiverStorage(): ...@@ -117,6 +117,26 @@ class ArchiverStorage():
for content_id in db.content_archive_get_missing(backend_name, cur): for content_id in db.content_archive_get_missing(backend_name, cur):
yield content_id[0] yield content_id[0]
@db_transaction_generator
def content_archive_get_unknown(self, content_ids, cur=None):
"""Retrieve unknown sha1s from content_archive.
Args:
content_ids ([sha1s]): list of sha1s to test
Yields:
Unknown sha1s from content_archive
"""
db = self.db
db.mktemp_content_archive()
db.copy_to(content_ids, 'tmp_content_archive', ['content_id'], cur)
for content_id in db.content_archive_get_unknown(cur):
yield content_id[0]
@db_transaction @db_transaction
def content_archive_update(self, content_id, archive_id, def content_archive_update(self, content_id, archive_id,
new_status=None, cur=None): new_status=None, cur=None):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment