Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
S
swh-loader-core
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Platform
Development
swh-loader-core
Commits
6ee5a695
Commit
6ee5a695
authored
3 months ago
by
Renaud Boyer
Browse files
Options
Downloads
Patches
Plain Diff
deposit: Include past releases in snapshots
parent
89659947
No related branches found
Branches containing commit
Tags
v5.19.0
Tags containing commit
1 merge request
!535
deposit: Include past releases in snapshots
Pipeline
#12270
passed
3 months ago
Stage: external
Changes
2
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
swh/loader/package/deposit/loader.py
+180
-37
180 additions, 37 deletions
swh/loader/package/deposit/loader.py
swh/loader/package/deposit/tests/test_deposit.py
+164
-27
164 additions, 27 deletions
swh/loader/package/deposit/tests/test_deposit.py
with
344 additions
and
64 deletions
swh/loader/package/deposit/loader.py
+
180
−
37
View file @
6ee5a695
...
...
@@ -5,8 +5,10 @@
import
datetime
from
datetime
import
timezone
from
functools
import
lru_cache
import
json
import
logging
import
re
from
typing
import
Any
,
Dict
,
Iterator
,
List
,
Mapping
,
Optional
,
Sequence
,
Tuple
,
Union
import
attr
...
...
@@ -15,13 +17,13 @@ import sentry_sdk
from
swh.core.config
import
load_from_envvar
from
swh.loader.core.loader
import
DEFAULT_CONFIG
from
swh.loader.core.utils
import
cached_method
,
download
from
swh.loader.core.utils
import
download
from
swh.loader.package.loader
import
(
BasePackageInfo
,
PackageLoader
,
RawExtrinsicMetadataCore
,
)
from
swh.model.hashutil
import
hash_to_bytes
,
hash_to_hex
from
swh.model.hashutil
import
hash_to_hex
from
swh.model.model
import
(
MetadataAuthority
,
MetadataAuthorityType
,
...
...
@@ -30,18 +32,41 @@ from swh.model.model import (
Person
,
Release
,
Sha1Git
,
Snapshot
,
TimestampWithTimezone
,
)
from
swh.storage.algos.snapshot
import
snapshot_get_all_branches
from
swh.storage.interface
import
StorageInterface
logger
=
logging
.
getLogger
(
__name__
)
DepositId
=
Union
[
int
,
str
]
def
now
()
->
datetime
.
datetime
:
return
datetime
.
datetime
.
now
(
tz
=
timezone
.
utc
)
def
build_branch_name
(
version
:
str
)
->
str
:
"""
Build a branch name from a version number.
There is no
"
branch name
"
concept in a deposit, so we artificially create a name
by prefixing the slugified version number of the repository with `deposit/`.
This could lead to duplicate branch names, if you need a unique branch name use
the ``generate_branch_name`` method of the loader as it keeps track of the branches
names previously issued.
Args:
version: a version number
Returns:
A branch name
"""
version
=
re
.
sub
(
r
"
[^\w\s\.-]
"
,
""
,
version
.
lower
())
version
=
re
.
sub
(
r
"
[-\s]+
"
,
"
-
"
,
version
).
strip
(
"
-_.
"
)
return
f
"
deposit/
{
version
}
"
@attr.s
class
DepositPackageInfo
(
BasePackageInfo
):
filename
=
attr
.
ib
(
type
=
str
)
# instead of Optional[str]
...
...
@@ -124,6 +149,8 @@ class DepositLoader(PackageLoader[DepositPackageInfo]):
self
.
deposit_id
=
deposit_id
self
.
client
=
deposit_client
self
.
default_filename
=
default_filename
# Keeps track of the branch names {version: branch_name} to avoid collisions
self
.
_branches_names
:
dict
[
str
,
str
]
=
dict
()
@classmethod
def
from_configfile
(
cls
,
**
kwargs
:
Any
):
...
...
@@ -141,12 +168,64 @@ class DepositLoader(PackageLoader[DepositPackageInfo]):
return
cls
.
from_config
(
deposit_client
=
deposit_client
,
**
config
)
def
get_versions
(
self
)
->
Sequence
[
str
]:
# only 1 branch 'HEAD' with no alias since we only have 1 snapshot
# branch
return
[
"
HEAD
"
]
"""
A list of versions from the list of releases.
Returns:
A list of versions
"""
return
[
r
[
"
software_version
"
]
for
r
in
self
.
client
.
releases_get
(
self
.
deposit_id
)
]
def
get_default_version
(
self
)
->
str
:
"""
The default version is the latest release.
Returns:
A version number
"""
return
self
.
get_versions
()[
-
1
]
def
generate_branch_name
(
self
,
version
:
str
)
->
str
:
"""
Generate a unique branch name from a version number.
Previously generated branch names are stored in the ``_branch_names`` property.
If ``version`` leads to a non unique branch name for this deposit we add a `/n`
suffix to the branch name, where `n` is a number.
Example:
loader.generate_branch_name(
"
ABC
"
)
# returns
"
deposit/abc
"
loader.generate_branch_name(
"
abc
"
)
# returns
"
deposit/abc/1
"
loader.generate_branch_name(
"
a$b$c
"
)
# returns
"
deposit/abc/2
"
loader.generate_branch_name(
"
def
"
)
# returns
"
deposit/def
"
Args:
version: a version number
Returns:
A unique branch name
"""
initial_branch_name
=
unique_branch_name
=
build_branch_name
(
version
)
counter
=
0
while
unique_branch_name
in
self
.
_branches_names
.
values
():
counter
+=
1
unique_branch_name
=
f
"
{
initial_branch_name
}
/
{
counter
}
"
self
.
_branches_names
[
version
]
=
unique_branch_name
return
unique_branch_name
def
get_default_branch_name
(
self
)
->
str
:
"""
The branch name of the default version.
Returns:
A branch name
"""
return
self
.
_branches_names
[
self
.
get_default_version
()]
def
get_metadata_authority
(
self
)
->
MetadataAuthority
:
provider
=
self
.
metadata
(
)[
"
provider
"
]
provider
=
self
.
client
.
metadata
_get
(
self
.
deposit_id
)[
"
provider
"
]
assert
provider
[
"
provider_type
"
]
==
MetadataAuthorityType
.
DEPOSIT_CLIENT
.
value
return
MetadataAuthority
(
type
=
MetadataAuthorityType
.
DEPOSIT_CLIENT
,
...
...
@@ -158,7 +237,7 @@ class DepositLoader(PackageLoader[DepositPackageInfo]):
)
def
get_metadata_fetcher
(
self
)
->
MetadataFetcher
:
tool
=
self
.
metadata
(
)[
"
tool
"
]
tool
=
self
.
client
.
metadata
_get
(
self
.
deposit_id
)[
"
tool
"
]
return
MetadataFetcher
(
name
=
tool
[
"
name
"
],
version
=
tool
[
"
version
"
],
...
...
@@ -168,19 +247,38 @@ class DepositLoader(PackageLoader[DepositPackageInfo]):
def
get_package_info
(
self
,
version
:
str
)
->
Iterator
[
Tuple
[
str
,
DepositPackageInfo
]]:
"""
Get package info
First we look for the version matching the branch name, then we fetch metadata
from the deposit server and build DepositPackageInfo with it.
Args:
version: a branch name.
Yields:
Package infos.
"""
deposit
=
next
(
d
for
d
in
self
.
client
.
releases_get
(
self
.
deposit_id
)
if
d
[
"
software_version
"
]
==
version
)
p_info
=
DepositPackageInfo
.
from_metadata
(
self
.
metadata
(
),
url
=
self
.
origin
.
url
,
self
.
client
.
metadata
_get
(
deposit
[
"
id
"
]
),
url
=
deposit
[
"
origin
_
url
"
]
,
filename
=
self
.
default_filename
,
version
=
version
,
version
=
deposit
[
"
software_
version
"
]
,
)
yield
"
HEAD
"
,
p_info
yield
self
.
generate_branch_name
(
version
),
p_info
def
download_package
(
self
,
p_info
:
DepositPackageInfo
,
tmpdir
:
str
)
->
List
[
Tuple
[
str
,
Mapping
]]:
"""
Override to allow use of the dedicated deposit client
"""
return
[
self
.
client
.
archive_get
(
self
.
deposit_
id
,
tmpdir
,
p_info
.
filename
)]
return
[
self
.
client
.
archive_get
(
p_info
.
id
,
tmpdir
,
p_info
.
filename
)]
def
build_release
(
self
,
...
...
@@ -209,7 +307,7 @@ class DepositLoader(PackageLoader[DepositPackageInfo]):
)
def
get_extrinsic_origin_metadata
(
self
)
->
List
[
RawExtrinsicMetadataCore
]:
metadata
=
self
.
metadata
(
)
metadata
=
self
.
client
.
metadata
_get
(
self
.
deposit_id
)
raw_metadata
:
str
=
metadata
[
"
raw_metadata
"
]
origin_metadata
=
json
.
dumps
(
{
...
...
@@ -231,16 +329,11 @@ class DepositLoader(PackageLoader[DepositPackageInfo]):
),
]
@cached_method
def
metadata
(
self
):
"""
Returns metadata from the deposit server
"""
return
self
.
client
.
metadata_get
(
self
.
deposit_id
)
def
load
(
self
)
->
Dict
:
# First making sure the deposit is known on the deposit's RPC server
# prior to trigger a loading
try
:
self
.
metadata
(
)
self
.
client
.
metadata
_get
(
self
.
deposit_id
)
except
ValueError
:
logger
.
exception
(
f
"
Unknown deposit
{
self
.
deposit_id
}
"
)
sentry_sdk
.
capture_exception
()
...
...
@@ -250,9 +343,15 @@ class DepositLoader(PackageLoader[DepositPackageInfo]):
return
super
().
load
()
def
finalize_visit
(
self
,
status_visit
:
str
,
errors
:
Optional
[
List
[
str
]]
=
None
,
**
kwargs
self
,
status_visit
:
str
,
snapshot
:
Optional
[
Snapshot
],
errors
:
Optional
[
List
[
str
]]
=
None
,
**
kwargs
,
)
->
Dict
[
str
,
Any
]:
r
=
super
().
finalize_visit
(
status_visit
=
status_visit
,
**
kwargs
)
r
=
super
().
finalize_visit
(
status_visit
=
status_visit
,
snapshot
=
snapshot
,
**
kwargs
)
success
=
status_visit
==
"
full
"
# Update deposit status
...
...
@@ -265,17 +364,30 @@ class DepositLoader(PackageLoader[DepositPackageInfo]):
)
return
r
snapshot_id
=
hash_to_bytes
(
r
[
"
snapshot_id
"
])
snapshot
=
snapshot_get_all_branches
(
self
.
storage
,
snapshot_id
)
if
not
snapshot
:
logger
.
error
(
"
No snapshot provided while finalizing deposit %d
"
,
self
.
deposit_id
,
)
return
r
branches
=
snapshot
.
branches
logger
.
debug
(
"
branches: %s
"
,
branches
)
if
not
branches
:
return
r
rel_id
=
branches
[
b
"
HEAD
"
].
target
default_branch_name
=
self
.
get_default_branch_name
()
branch_by_name
=
branches
[
default_branch_name
.
encode
()]
if
not
branch_by_name
or
not
branch_by_name
.
target
:
logger
.
error
(
"
Unable to get branch %s for deposit %d
"
,
default_branch_name
,
self
.
deposit_id
,
)
return
r
rel_id
=
branch_by_name
.
target
release
=
self
.
storage
.
release_get
([
rel_id
])[
0
]
if
not
release
:
return
r
...
...
@@ -329,28 +441,59 @@ class ApiClient:
return
method_fn
(
url
,
*
args
,
**
kwargs
)
def
archive_get
(
self
,
deposit_id
:
Union
[
int
,
str
]
,
tmpdir
:
str
,
filename
:
str
self
,
deposit_id
:
DepositId
,
tmpdir
:
str
,
filename
:
str
)
->
Tuple
[
str
,
Dict
]:
"""
Retrieve deposit
'
s archive artifact locally
"""
url
=
f
"
{
self
.
base_url
}
/
{
deposit_id
}
/raw/
"
return
download
(
url
,
dest
=
tmpdir
,
filename
=
filename
,
auth
=
self
.
auth
)
def
metadata_url
(
self
,
deposit_id
:
Union
[
int
,
str
])
->
str
:
return
f
"
{
self
.
base_url
}
/
{
deposit_id
}
/meta/
"
@lru_cache
def
metadata_get
(
self
,
deposit_id
:
DepositId
)
->
Dict
[
str
,
Any
]:
"""
Retrieve deposit
'
s metadata artifact as json
The result of this API call is cached.
Args:
deposit_id: a deposit id
Returns:
A dict of metadata
Raises:
ValueError: something when wrong with the metadata API.
"""
response
=
self
.
do
(
"
get
"
,
f
"
{
self
.
base_url
}
/
{
deposit_id
}
/meta/
"
)
if
not
response
.
ok
:
raise
ValueError
(
f
"
Problem when retrieving deposit metadata at
{
response
.
url
}
"
)
return
response
.
json
()
@lru_cache
def
releases_get
(
self
,
deposit_id
:
DepositId
)
->
List
[
Dict
[
str
,
Any
]]:
"""
Retrieve the list of releases related to this deposit.
The result of this API call is cached.
Args:
deposit_id: a deposit id
def
metadata_get
(
self
,
deposit_id
:
Union
[
int
,
str
])
->
Dict
[
str
,
Any
]:
"""
Retrieve deposit
'
s metadata artifact as json
"""
url
=
self
.
metadata_url
(
deposit_id
)
r
=
self
.
do
(
"
get
"
,
url
)
if
r
.
ok
:
return
r
.
json
()
Returns:
A list of deposits
msg
=
f
"
Problem when retrieving deposit metadata at
{
url
}
"
raise
ValueError
(
msg
)
Raises:
ValueError: something when wrong with the releases API.
"""
response
=
self
.
do
(
"
get
"
,
f
"
{
self
.
base_url
}
/
{
deposit_id
}
/releases/
"
)
if
not
response
.
ok
:
raise
ValueError
(
f
"
Problem when retrieving deposit releases at
{
response
.
url
}
"
)
return
response
.
json
()
def
status_update
(
self
,
deposit_id
:
Union
[
int
,
str
]
,
deposit_id
:
DepositId
,
status
:
str
,
errors
:
Optional
[
List
[
str
]]
=
None
,
release_id
:
Optional
[
str
]
=
None
,
...
...
This diff is collapsed.
Click to expand it.
swh/loader/package/deposit/tests/test_deposit.py
+
164
−
27
View file @
6ee5a695
# Copyright (C) 2019-202
1
The Software Heritage developers
# Copyright (C) 2019-202
4
The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
...
...
@@ -10,7 +10,11 @@ import re
import
pytest
from
swh.core.pytest_plugin
import
requests_mock_datadir_factory
from
swh.loader.package.deposit.loader
import
ApiClient
,
DepositLoader
from
swh.loader.package.deposit.loader
import
(
ApiClient
,
DepositLoader
,
build_branch_name
,
)
from
swh.loader.package.loader
import
now
from
swh.loader.tests
import
assert_last_visit_matches
,
check_snapshot
,
get_stats
from
swh.model.hashutil
import
hash_to_bytes
,
hash_to_hex
...
...
@@ -107,13 +111,18 @@ requests_mock_datadir_missing_one = requests_mock_datadir_factory(
def
test_deposit_loading_failure_to_retrieve_1_artifact
(
swh_storage
,
deposit_client
,
requests_mock_datadir_missing_one
swh_storage
,
deposit_client
,
requests_mock_datadir_missing_one
,
requests_mock
):
"""
Deposit with missing artifact ends up with an uneventful/partial visit
"""
# private api url form: 'https://deposit.s.o/1/private/hal/666/raw/'
url
=
"
some-url-2
"
deposit_id
=
666
requests_mock_datadir_missing_one
.
put
(
re
.
compile
(
"
https
"
))
releases
=
[{
"
id
"
:
deposit_id
,
"
software_version
"
:
"
1
"
,
"
origin_url
"
:
url
}]
requests_mock_datadir_missing_one
.
get
(
f
"
{
DEPOSIT_URL
}
/
{
deposit_id
}
/releases/
"
,
json
=
releases
)
loader
=
DepositLoader
(
swh_storage
,
url
,
deposit_id
,
deposit_client
,
default_filename
=
"
archive.zip
"
)
...
...
@@ -151,7 +160,7 @@ def test_deposit_loading_failure_to_retrieve_1_artifact(
"
status
"
:
"
failed
"
,
"
status_detail
"
:
{
"
loading
"
:
[
"
Failed to load branch
HEAD
for some-url-2: 404 Client Error: None
"
"
Failed to load branch
deposit/1
for some-url-2: 404 Client Error: None
"
"
for url: https://deposit.softwareheritage.org/1/private/666/raw/
"
]
},
...
...
@@ -163,12 +172,14 @@ def test_deposit_loading_failure_to_retrieve_1_artifact(
def
test_deposit_loading_ok
(
swh_storage
,
deposit_client
,
requests_mock_datadir
):
url
=
"
https://hal-test.archives-ouvertes.fr/some-external-id
"
deposit_id
=
666
releases
=
[{
"
id
"
:
deposit_id
,
"
software_version
"
:
"
1
"
,
"
origin_url
"
:
url
}]
requests_mock_datadir
.
get
(
f
"
{
DEPOSIT_URL
}
/
{
deposit_id
}
/releases/
"
,
json
=
releases
)
loader
=
DepositLoader
(
swh_storage
,
url
,
deposit_id
,
deposit_client
,
default_filename
=
"
archive.zip
"
)
actual_load_status
=
loader
.
load
()
expected_snapshot_id
=
"
338b45d87e02fb5cbf324694bc4a898623d6a30f
"
expected_snapshot_id
=
"
28d6c5f69c4022a359203de8e2e81bda103b148e
"
assert
actual_load_status
==
{
"
status
"
:
"
eventful
"
,
"
snapshot_id
"
:
expected_snapshot_id
,
...
...
@@ -182,21 +193,27 @@ def test_deposit_loading_ok(swh_storage, deposit_client, requests_mock_datadir):
snapshot
=
hash_to_bytes
(
expected_snapshot_id
),
)
release_id_hex
=
"
2566a64a27bc00362e265be9666d7606750530a1
"
release_id
=
hash_to_bytes
(
release_id_hex
)
release_id_
1_
hex
=
"
c98c19f43ef10a4262345d8e85ca283cea99c7b3
"
release_id
_1
=
hash_to_bytes
(
release_id_
1_
hex
)
expected_snapshot
=
Snapshot
(
id
=
hash_to_bytes
(
expected_snapshot_id
),
branches
=
{
b
"
HEAD
"
:
SnapshotBranch
(
target
=
release_id
,
target
=
b
"
deposit/1
"
,
target_type
=
SnapshotTargetType
.
ALIAS
,
),
b
"
deposit/1
"
:
SnapshotBranch
(
target
=
release_id_1
,
target_type
=
SnapshotTargetType
.
RELEASE
,
),
},
)
check_snapshot
(
expected_snapshot
,
storage
=
loader
.
storage
)
release
=
loader
.
storage
.
release_get
([
release_id
])[
0
]
release
=
loader
.
storage
.
release_get
([
release_id_1
])[
0
]
date
=
TimestampWithTimezone
.
from_datetime
(
datetime
.
datetime
(
2017
,
10
,
7
,
15
,
17
,
8
,
tzinfo
=
datetime
.
timezone
.
utc
)
)
...
...
@@ -205,9 +222,10 @@ def test_deposit_loading_ok(swh_storage, deposit_client, requests_mock_datadir):
name
=
b
"
Software Heritage
"
,
email
=
b
"
robot@softwareheritage.org
"
,
)
assert
release
==
Release
(
id
=
release_id
,
name
=
b
"
HEAD
"
,
id
=
release_id
_1
,
name
=
b
"
1
"
,
message
=
b
"
hal: Deposit 666 in collection hal
\n
"
,
author
=
person
,
date
=
date
,
...
...
@@ -270,7 +288,7 @@ def test_deposit_loading_ok(swh_storage, deposit_client, requests_mock_datadir):
body
=
update_query
.
json
()
expected_body
=
{
"
status
"
:
"
done
"
,
"
release_id
"
:
release_id_hex
,
"
release_id
"
:
release_id_
1_
hex
,
"
directory_id
"
:
hash_to_hex
(
release
.
target
),
"
snapshot_id
"
:
expected_snapshot_id
,
"
origin_url
"
:
url
,
...
...
@@ -296,12 +314,16 @@ def test_deposit_loading_ok_2(swh_storage, deposit_client, requests_mock_datadir
external_id
=
"
some-external-id
"
url
=
f
"
https://hal-test.archives-ouvertes.fr/
{
external_id
}
"
deposit_id
=
777
releases
=
[{
"
id
"
:
deposit_id
,
"
software_version
"
:
"
1
"
,
"
origin_url
"
:
url
}]
requests_mock_datadir
.
get
(
f
"
{
DEPOSIT_URL
}
/
{
deposit_id
}
/releases/
"
,
json
=
releases
)
loader
=
DepositLoader
(
swh_storage
,
url
,
deposit_id
,
deposit_client
,
default_filename
=
"
archive.zip
"
)
actual_load_status
=
loader
.
load
()
expected_snapshot_id
=
"
3449b8ff31abeacefd33cca60e3074c1649dc3a1
"
expected_snapshot_id
=
"
ee5789e4f8f5ebde20b1b9e6a7338781d4f65c9b
"
assert
actual_load_status
==
{
"
status
"
:
"
eventful
"
,
...
...
@@ -315,13 +337,23 @@ def test_deposit_loading_ok_2(swh_storage, deposit_client, requests_mock_datadir
snapshot
=
hash_to_bytes
(
expected_snapshot_id
),
)
release_id
=
"
ba6c9a59ae3256e765d32b211cc183dc2380aed7
"
release_id_head_hex
=
"
6465706f7369742f31
"
release_id_head
=
hash_to_bytes
(
release_id_head_hex
)
release_id_1_hex
=
"
3b7f58c924063e1dc4976e8fb8e5503592fddedd
"
release_id_1
=
hash_to_bytes
(
release_id_1_hex
)
expected_snapshot
=
Snapshot
(
id
=
hash_to_bytes
(
expected_snapshot_id
),
branches
=
{
b
"
HEAD
"
:
SnapshotBranch
(
target
=
hash_to_bytes
(
release_id
),
target_type
=
SnapshotTargetType
.
RELEASE
)
target
=
release_id_head
,
target_type
=
SnapshotTargetType
.
ALIAS
,
),
b
"
deposit/1
"
:
SnapshotBranch
(
target
=
release_id_1
,
target_type
=
SnapshotTargetType
.
RELEASE
,
),
},
)
...
...
@@ -331,7 +363,7 @@ def test_deposit_loading_ok_2(swh_storage, deposit_client, requests_mock_datadir
# Ensure the date fields are set appropriately in the release
# Retrieve the release
release
=
loader
.
storage
.
release_get
([
hash_to_bytes
(
release_id
)])[
0
]
release
=
loader
.
storage
.
release_get
([
hash_to_bytes
(
release_id
_1
)])[
0
]
assert
release
# swh-deposit uses the numeric 'offset_minutes' instead of the bytes offset
# attribute, because its dates are always well-formed, and it can only send
...
...
@@ -422,7 +454,7 @@ def test_deposit_loading_ok_2(swh_storage, deposit_client, requests_mock_datadir
assert
len
(
actual_directory_metadata
.
results
)
==
1
release_swhid
=
CoreSWHID
(
object_type
=
ObjectType
.
RELEASE
,
object_id
=
hash_to_bytes
(
release_id
)
object_type
=
ObjectType
.
RELEASE
,
object_id
=
hash_to_bytes
(
release_id
_1
)
)
dir_metadata_template
=
RawExtrinsicMetadata
(
target
=
directory_swhid
,
...
...
@@ -469,7 +501,7 @@ def test_deposit_loading_ok_2(swh_storage, deposit_client, requests_mock_datadir
body
=
update_query
.
json
()
expected_body
=
{
"
status
"
:
"
done
"
,
"
release_id
"
:
release_id
,
"
release_id
"
:
release_id
_1_hex
,
"
directory_id
"
:
hash_to_hex
(
release
.
target
),
"
snapshot_id
"
:
expected_snapshot_id
,
"
origin_url
"
:
url
,
...
...
@@ -487,10 +519,14 @@ def test_deposit_loading_ok_3(swh_storage, deposit_client, requests_mock_datadir
external_id
=
"
hal-123456
"
url
=
f
"
https://hal-test.archives-ouvertes.fr/
{
external_id
}
"
deposit_id
=
888
releases
=
[{
"
id
"
:
deposit_id
,
"
software_version
"
:
"
1
"
,
"
origin_url
"
:
url
}]
requests_mock_datadir
.
get
(
f
"
{
DEPOSIT_URL
}
/
{
deposit_id
}
/releases/
"
,
json
=
releases
)
loader
=
DepositLoader
(
swh_storage
,
url
,
deposit_id
,
deposit_client
)
actual_load_status
=
loader
.
load
()
expected_snapshot_id
=
"
4677843de89e398f1d6bfedc9ca9b89c451c55c8
"
expected_snapshot_id
=
"
2f95506d6194e6c4e71ba87e3b118c65a767fe9d
"
assert
actual_load_status
==
{
"
status
"
:
"
eventful
"
,
...
...
@@ -510,12 +546,15 @@ def test_deposit_loading_ok_release_notes(
):
url
=
"
https://hal-test.archives-ouvertes.fr/some-external-id
"
deposit_id
=
999
releases
=
[{
"
id
"
:
deposit_id
,
"
software_version
"
:
"
1
"
,
"
origin_url
"
:
url
}]
requests_mock_datadir
.
get
(
f
"
{
DEPOSIT_URL
}
/
{
deposit_id
}
/releases/
"
,
json
=
releases
)
loader
=
DepositLoader
(
swh_storage
,
url
,
deposit_id
,
deposit_client
,
default_filename
=
"
archive.zip
"
)
actual_load_status
=
loader
.
load
()
expected_snapshot_id
=
"
a307acffb7c29bebb3daf1bcb680bb3f452890a8
"
expected_snapshot_id
=
"
41cd91cb190ffa82fee8ec5d91dedc5d57fb3b1f
"
assert
actual_load_status
==
{
"
status
"
:
"
eventful
"
,
"
snapshot_id
"
:
expected_snapshot_id
,
...
...
@@ -529,21 +568,26 @@ def test_deposit_loading_ok_release_notes(
snapshot
=
hash_to_bytes
(
expected_snapshot_id
),
)
release_id_hex
=
"
f5e8ec02ede57edbe061afa7fc2a07bb7d14a700
"
release_id
=
hash_to_bytes
(
release_id_hex
)
release_id_
1_
hex
=
"
a7cae4b6aaaf70f30178d86496aefb7dead0eb77
"
release_id
_1
=
hash_to_bytes
(
release_id_
1_
hex
)
expected_snapshot
=
Snapshot
(
id
=
hash_to_bytes
(
expected_snapshot_id
),
branches
=
{
b
"
HEAD
"
:
SnapshotBranch
(
target
=
release_id
,
target
=
b
"
deposit/1
"
,
target_type
=
SnapshotTargetType
.
ALIAS
,
),
b
"
deposit/1
"
:
SnapshotBranch
(
target
=
release_id_1
,
target_type
=
SnapshotTargetType
.
RELEASE
,
),
},
)
check_snapshot
(
expected_snapshot
,
storage
=
loader
.
storage
)
release
=
loader
.
storage
.
release_get
([
release_id
])[
0
]
release
=
loader
.
storage
.
release_get
([
release_id
_1
])[
0
]
date
=
TimestampWithTimezone
.
from_datetime
(
datetime
.
datetime
(
2017
,
10
,
7
,
15
,
17
,
8
,
tzinfo
=
datetime
.
timezone
.
utc
)
)
...
...
@@ -553,8 +597,8 @@ def test_deposit_loading_ok_release_notes(
email
=
b
"
robot@softwareheritage.org
"
,
)
assert
release
==
Release
(
id
=
release_id
,
name
=
b
"
HEAD
"
,
id
=
release_id
_1
,
name
=
b
"
1
"
,
message
=
(
b
"
hal: Deposit 999 in collection hal
\n\n
This release adds this and that.
\n
"
),
...
...
@@ -565,3 +609,96 @@ def test_deposit_loading_ok_release_notes(
synthetic
=
True
,
metadata
=
None
,
)
def
test_deposit_get_versions
(
swh_storage
,
deposit_client
,
requests_mock_datadir
):
external_id
=
"
hal-123456
"
url
=
f
"
https://hal-test.archives-ouvertes.fr/
{
external_id
}
"
deposit_id
=
888
releases
=
[
{
"
id
"
:
deposit_id
,
"
software_version
"
:
"
version 1
"
,
"
origin_url
"
:
url
},
{
"
id
"
:
deposit_id
,
"
software_version
"
:
"
version 2
"
,
"
origin_url
"
:
url
},
]
requests_mock_datadir
.
get
(
f
"
{
DEPOSIT_URL
}
/
{
deposit_id
}
/releases/
"
,
json
=
releases
)
loader
=
DepositLoader
(
swh_storage
,
url
,
deposit_id
,
deposit_client
)
loader
.
load
()
assert
loader
.
get_versions
()
==
[
"
version 1
"
,
"
version 2
"
]
assert
loader
.
get_default_version
()
==
"
version 2
"
def
test_deposit_deduplicate_branch_names
(
swh_storage
,
deposit_client
,
requests_mock_datadir
):
external_id
=
"
hal-123456
"
url
=
f
"
https://hal-test.archives-ouvertes.fr/
{
external_id
}
"
deposit_id
=
888
releases
=
[
{
"
id
"
:
deposit_id
,
"
software_version
"
:
"
ABC
"
,
"
origin_url
"
:
url
},
{
"
id
"
:
deposit_id
,
"
software_version
"
:
"
abc
"
,
"
origin_url
"
:
url
},
{
"
id
"
:
deposit_id
,
"
software_version
"
:
"
a$B$c$
"
,
"
origin_url
"
:
url
},
]
requests_mock_datadir
.
get
(
f
"
{
DEPOSIT_URL
}
/
{
deposit_id
}
/releases/
"
,
json
=
releases
)
loader
=
DepositLoader
(
swh_storage
,
url
,
deposit_id
,
deposit_client
)
status
=
loader
.
load
()
# unique branches names for each versions
snapshot
=
loader
.
storage
.
snapshot_get
(
hash_to_bytes
(
status
[
"
snapshot_id
"
]))
assert
len
(
snapshot
[
"
branches
"
])
==
4
assert
snapshot
[
"
branches
"
][
b
"
deposit/abc
"
]
assert
snapshot
[
"
branches
"
][
b
"
deposit/abc/1
"
]
assert
snapshot
[
"
branches
"
][
b
"
deposit/abc/2
"
]
assert
snapshot
[
"
branches
"
][
b
"
HEAD
"
][
"
target
"
]
==
b
"
deposit/abc/2
"
# the deposit will be updated with the right release_id
release
=
loader
.
storage
.
release_get
(
[
snapshot
[
"
branches
"
][
b
"
deposit/abc/2
"
][
"
target
"
]]
)[
0
]
urls
=
[
m
for
m
in
requests_mock_datadir
.
request_history
if
m
.
url
==
f
"
{
DEPOSIT_URL
}
/
{
deposit_id
}
/update/
"
]
assert
len
(
urls
)
==
1
update_query
=
urls
[
0
]
assert
update_query
.
json
()[
"
release_id
"
]
==
hash_to_hex
(
release
.
id
)
@pytest.mark.parametrize
(
"
version,expected
"
,
[
(
"
0
"
,
"
deposit/0
"
),
(
"
Weird version Number
"
,
"
deposit/weird-version-number
"
),
(
"
trailing-
"
,
"
deposit/trailing
"
),
(
"
1.2.3
"
,
"
deposit/1.2.3
"
),
],
)
def
test_build_branch_name
(
version
,
expected
):
assert
build_branch_name
(
version
)
==
expected
@pytest.mark.parametrize
(
"
version,expected
"
,
[
(
"
0
"
,
"
deposit/0
"
),
(
"
Weird version Number
"
,
"
deposit/weird-version-number
"
),
(
"
trailing-
"
,
"
deposit/trailing
"
),
(
"
1.2.3
"
,
"
deposit/1.2.3
"
),
],
)
def
test_generate_branch_name
(
swh_storage
,
deposit_client
,
version
,
expected
):
loader
=
DepositLoader
(
swh_storage
,
"
test
"
,
1
,
deposit_client
)
assert
loader
.
generate_branch_name
(
version
)
==
expected
def
test_generate_branch_name_uniqueness
(
swh_storage
,
deposit_client
):
loader
=
DepositLoader
(
swh_storage
,
"
test
"
,
1
,
deposit_client
)
assert
loader
.
generate_branch_name
(
"
A
"
)
==
"
deposit/a
"
assert
loader
.
generate_branch_name
(
"
a
"
)
==
"
deposit/a/1
"
assert
loader
.
generate_branch_name
(
"
a$
"
)
==
"
deposit/a/2
"
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment