Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
S
swh-archiver
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
Platform
Development
swh-archiver
Commits
ed908fef
Commit
ed908fef
authored
8 years ago
by
Antoine R. Dumont
Browse files
Options
Downloads
Patches
Plain Diff
sql/archiver/schema: Filter unknown sha1s from content_archive endpoint
parent
7b463c6c
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
sql/swh-archiver-func.sql
+17
-0
17 additions, 0 deletions
sql/swh-archiver-func.sql
sql/upgrades/005.sql
+24
-0
24 additions, 0 deletions
sql/upgrades/005.sql
swh/archiver/db.py
+8
-0
8 additions, 0 deletions
swh/archiver/db.py
swh/archiver/storage.py
+22
-2
22 additions, 2 deletions
swh/archiver/storage.py
with
71 additions
and
2 deletions
sql/swh-archiver-func.sql
+
17
−
0
View file @
ed908fef
...
@@ -29,3 +29,20 @@ end
...
@@ -29,3 +29,20 @@ end
$$
;
$$
;
COMMENT
ON
FUNCTION
swh_content_archive_missing
(
text
)
IS
'Filter missing data from a specific backend'
;
COMMENT
ON
FUNCTION
swh_content_archive_missing
(
text
)
IS
'Filter missing data from a specific backend'
;
create
or
replace
function
swh_content_archive_unknown
()
returns
setof
sha1
language
plpgsql
as
$$
begin
return
query
select
content_id
from
tmp_content_archive
tmp
where
not
exists
(
select
1
from
content_archive
c
where
tmp
.
content_id
=
c
.
content_id
);
end
$$
;
COMMENT
ON
FUNCTION
swh_content_archive_unknown
()
IS
'Retrieve list of unknown sha1s'
;
This diff is collapsed.
Click to expand it.
sql/upgrades/005.sql
0 → 100644
+
24
−
0
View file @
ed908fef
-- SWH DB schema upgrade
-- from_version: 4
-- to_version: 5
-- description: List unknown sha1s from content_archive
INSERT
INTO
dbversion
(
version
,
release
,
description
)
VALUES
(
5
,
now
(),
'Work In Progress'
);
create
or
replace
function
swh_content_archive_unknown
()
returns
setof
sha1
language
plpgsql
as
$$
begin
return
query
select
content_id
from
tmp_content_archive
tmp
where
not
exists
(
select
1
from
content_archive
c
where
tmp
.
content_id
=
c
.
content_id
);
end
$$
;
COMMENT
ON
FUNCTION
swh_content_archive_unknown
()
IS
'Retrieve list of unknown sha1'
;
This diff is collapsed.
Click to expand it.
swh/archiver/db.py
+
8
−
0
View file @
ed908fef
...
@@ -196,6 +196,14 @@ class ArchiverDb(BaseDb):
...
@@ -196,6 +196,14 @@ class ArchiverDb(BaseDb):
(
backend_name
,))
(
backend_name
,))
yield
from
cursor_to_bytes
(
cur
)
yield
from
cursor_to_bytes
(
cur
)
def
content_archive_get_unknown
(
self
,
cur
=
None
):
"""
Retrieve unknown sha1 from archiver db.
"""
cur
=
self
.
_cursor
(
cur
)
cur
.
execute
(
'
select * from swh_content_archive_unknown()
'
)
yield
from
cursor_to_bytes
(
cur
)
def
content_archive_insert
(
self
,
content_id
,
source
,
status
,
cur
=
None
):
def
content_archive_insert
(
self
,
content_id
,
source
,
status
,
cur
=
None
):
"""
Insert a new entry in the db for the content_id.
"""
Insert a new entry in the db for the content_id.
...
...
This diff is collapsed.
Click to expand it.
swh/archiver/storage.py
+
22
−
2
View file @
ed908fef
...
@@ -98,14 +98,14 @@ class ArchiverStorage():
...
@@ -98,14 +98,14 @@ class ArchiverStorage():
@db_transaction_generator
@db_transaction_generator
def
content_archive_get_missing
(
self
,
content_ids
,
backend_name
,
cur
=
None
):
def
content_archive_get_missing
(
self
,
content_ids
,
backend_name
,
cur
=
None
):
"""
Retrieve
the list of
missing
copie
s from source_name.
"""
Retrieve missing
sha1
s from source_name.
Args:
Args:
content_ids ([sha1s]): list of sha1s to test
content_ids ([sha1s]): list of sha1s to test
source_name (str): Name of the backend to check for content
source_name (str): Name of the backend to check for content
Yields:
Yields:
List of ids effectively
missing from backend_name
missing
sha1s
from backend_name
"""
"""
db
=
self
.
db
db
=
self
.
db
...
@@ -117,6 +117,26 @@ class ArchiverStorage():
...
@@ -117,6 +117,26 @@ class ArchiverStorage():
for
content_id
in
db
.
content_archive_get_missing
(
backend_name
,
cur
):
for
content_id
in
db
.
content_archive_get_missing
(
backend_name
,
cur
):
yield
content_id
[
0
]
yield
content_id
[
0
]
@db_transaction_generator
def
content_archive_get_unknown
(
self
,
content_ids
,
cur
=
None
):
"""
Retrieve unknown sha1s from content_archive.
Args:
content_ids ([sha1s]): list of sha1s to test
Yields:
Unknown sha1s from content_archive
"""
db
=
self
.
db
db
.
mktemp_content_archive
()
db
.
copy_to
(
content_ids
,
'
tmp_content_archive
'
,
[
'
content_id
'
],
cur
)
for
content_id
in
db
.
content_archive_get_unknown
(
cur
):
yield
content_id
[
0
]
@db_transaction
@db_transaction
def
content_archive_update
(
self
,
content_id
,
archive_id
,
def
content_archive_update
(
self
,
content_id
,
archive_id
,
new_status
=
None
,
cur
=
None
):
new_status
=
None
,
cur
=
None
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment