Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
S
swh-loader-core
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Platform
Development
swh-loader-core
Commits
939c0f54
Verified
Commit
939c0f54
authored
5 years ago
by
Antoine R. Dumont
Browse files
Options
Downloads
Patches
Plain Diff
pypi: Add support for loading repositories with missing artifacts
parent
f382f277
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
swh/loader/package/loader.py
+20
-12
20 additions, 12 deletions
swh/loader/package/loader.py
swh/loader/package/tests/conftest.py
+4
-2
4 additions, 2 deletions
swh/loader/package/tests/conftest.py
swh/loader/package/tests/test_pypi.py
+77
-3
77 additions, 3 deletions
swh/loader/package/tests/test_pypi.py
with
101 additions
and
17 deletions
swh/loader/package/loader.py
+
20
−
12
View file @
939c0f54
...
...
@@ -168,8 +168,9 @@ class PackageLoader:
"""
status_load
=
'
uneventful
'
# either: eventful, uneventful, failed
status_visit
=
'
partial
'
# either: partial, full
status_visit
=
'
full
'
# either: partial, full
tmp_revisions
:
Dict
[
str
,
List
]
=
{}
snapshot
=
None
try
:
# Prepare origin and origin_visit
...
...
@@ -193,9 +194,15 @@ class PackageLoader:
for
a_filename
,
a_uri
,
a_metadata
in
self
.
get_artifacts
(
version
):
with
tempfile
.
TemporaryDirectory
()
as
tmpdir
:
# a_c_: archive_computed_
a_path
,
a_c_metadata
=
self
.
fetch_artifact_archive
(
a_uri
,
dest
=
tmpdir
)
try
:
# a_c_: archive_computed_
a_path
,
a_c_metadata
=
self
.
fetch_artifact_archive
(
a_uri
,
dest
=
tmpdir
)
except
Exception
as
e
:
logger
.
warning
(
'
Unable to retrieve %s. Reason: %s
'
,
a_uri
,
e
)
status_visit
=
'
partial
'
continue
logger
.
debug
(
'
archive_path: %s
'
,
a_path
)
logger
.
debug
(
'
archive_computed_metadata: %s
'
,
...
...
@@ -275,15 +282,15 @@ class PackageLoader:
'
target_type
'
:
'
revision
'
,
'
target
'
:
x
[
'
target
'
],
}
snapshot
=
{
'
branches
'
:
branches
}
snapshot
[
'
id
'
]
=
identifier_to_bytes
(
snapshot
_
identifier
(
snapshot
))
self
.
storage
.
snapshot_add
([
snapshot
]
)
if
branches
:
snapshot
=
{
'
branches
'
:
branches
}
snapshot
[
'
id
'
]
=
identifier
_to_bytes
(
snapshot_identifier
(
snapshot
)
)
# come so far, we actually reached a full visit
status_visit
=
'
full
'
logger
.
debug
(
'
snapshot: %s
'
,
snapshot
)
self
.
storage
.
snapshot_add
([
snapshot
])
# Update the visit's state
self
.
storage
.
origin_visit_update
(
...
...
@@ -291,5 +298,6 @@ class PackageLoader:
snapshot
=
snapshot
)
except
Exception
as
e
:
logger
.
warning
(
'
Fail to load %s. Reason: %s
'
%
(
self
.
url
,
e
))
status_visit
=
'
partial
'
finally
:
return
{
'
status
'
:
status_load
}
This diff is collapsed.
Click to expand it.
swh/loader/package/tests/conftest.py
+
4
−
2
View file @
939c0f54
...
...
@@ -66,10 +66,12 @@ def get_response_cb(request, context, ignore_urls=[]):
"""
logger
.
debug
(
'
get_response_cb(%s, %s)
'
,
request
,
context
)
url
=
urlparse
(
request
.
url
)
if
url
in
ignore_urls
:
logger
.
debug
(
'
url: %s
'
,
request
.
url
)
logger
.
debug
(
'
ignore_urls: %s
'
,
ignore_urls
)
if
request
.
url
in
ignore_urls
:
context
.
status_code
=
404
return
None
url
=
urlparse
(
request
.
url
)
dirname
=
url
.
hostname
# pypi.org | files.pythonhosted.org
# url.path: pypi/<project>/json -> local file: pypi_<project>_json
filename
=
url
.
path
[
1
:]
...
...
This diff is collapsed.
Click to expand it.
swh/loader/package/tests/test_pypi.py
+
77
−
3
View file @
939c0f54
...
...
@@ -17,6 +17,8 @@ from swh.loader.package.pypi import (
from
swh.loader.package.tests.common
import
DATADIR
,
check_snapshot
from
swh.loader.package.tests.conftest
import
local_get_factory
def
test_author_basic
():
data
=
{
...
...
@@ -202,16 +204,18 @@ def test_sdist_parse_failures(tmp_path):
# "edge" cases (for the same origin) #
# no release artifact:
# {visit full, status: uneventful, no contents, etc...}
def
test_no_release_artifact
(
requests_mock
):
pass
# no release artifact:
# {visit full, status: uneventful, no contents, etc...}
# problem during loading:
# {visit: partial, status: uneventful, no snapshot}
# problem during loading: failure early enough in between swh contents...
# some contents (contents, directories, etc...) have been written in storage
# {visit: partial, status: eventful, no snapshot}
...
...
@@ -222,6 +226,76 @@ def test_no_release_artifact(requests_mock):
# "normal" cases (for the same origin) #
local_get_missing
=
local_get_factory
(
ignore_urls
=
[
'
https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip
'
,
# noqa
])
# some missing release artifacts:
# {visit partial, status: eventful, 1 snapshot}
def
test_release_with_missing_artifact
(
swh_config
,
local_get_missing
):
"""
Load a pypi project with some missing artifacts ends up with 1 snapshot
"""
loader
=
PyPILoader
(
'
https://pypi.org/project/0805nexter
'
)
actual_load_status
=
loader
.
load
()
assert
actual_load_status
==
{
'
status
'
:
'
eventful
'
}
stats
=
loader
.
storage
.
stat_counters
()
assert
{
'
content
'
:
3
,
'
directory
'
:
2
,
'
origin
'
:
1
,
'
origin_visit
'
:
1
,
'
person
'
:
1
,
'
release
'
:
0
,
'
revision
'
:
1
,
'
skipped_content
'
:
0
,
'
snapshot
'
:
1
}
==
stats
expected_contents
=
map
(
hash_to_bytes
,
[
'
405859113963cb7a797642b45f171d6360425d16
'
,
'
e5686aa568fdb1d19d7f1329267082fe40482d31
'
,
'
83ecf6ec1114fd260ca7a833a2d165e71258c338
'
,
])
assert
list
(
loader
.
storage
.
content_missing_per_sha1
(
expected_contents
))
\
==
[]
expected_dirs
=
map
(
hash_to_bytes
,
[
'
b178b66bd22383d5f16f4f5c923d39ca798861b4
'
,
'
c3a58f8b57433a4b56caaa5033ae2e0931405338
'
,
])
assert
list
(
loader
.
storage
.
directory_missing
(
expected_dirs
))
==
[]
# {revision hash: directory hash}
expected_revs
=
{
hash_to_bytes
(
'
e445da4da22b31bfebb6ffc4383dbf839a074d21
'
):
hash_to_bytes
(
'
b178b66bd22383d5f16f4f5c923d39ca798861b4
'
),
# noqa
}
assert
list
(
loader
.
storage
.
revision_missing
(
expected_revs
))
==
[]
expected_branches
=
{
'
releases/1.2.0
'
:
{
'
target
'
:
'
e445da4da22b31bfebb6ffc4383dbf839a074d21
'
,
'
target_type
'
:
'
revision
'
,
},
'
HEAD
'
:
{
'
target
'
:
'
releases/1.2.0
'
,
'
target_type
'
:
'
alias
'
,
},
}
check_snapshot
(
'
dd0e4201a232b1c104433741dbf45895b8ac9355
'
,
expected_branches
,
storage
=
loader
.
storage
)
def
test_release_artifact_no_prior_visit
(
swh_config
,
local_get
):
"""
With no prior visit, load a pypi project ends up with 1 snapshot
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment