Skip to content
Snippets Groups Projects
Verified Commit 939c0f54 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

pypi: Add support for loading repositories with missing artifacts

parent f382f277
No related branches found
No related tags found
No related merge requests found
......@@ -168,8 +168,9 @@ class PackageLoader:
"""
status_load = 'uneventful' # either: eventful, uneventful, failed
status_visit = 'partial' # either: partial, full
status_visit = 'full' # either: partial, full
tmp_revisions: Dict[str, List] = {}
snapshot = None
try:
# Prepare origin and origin_visit
......@@ -193,9 +194,15 @@ class PackageLoader:
for a_filename, a_uri, a_metadata in self.get_artifacts(
version):
with tempfile.TemporaryDirectory() as tmpdir:
# a_c_: archive_computed_
a_path, a_c_metadata = self.fetch_artifact_archive(
a_uri, dest=tmpdir)
try:
# a_c_: archive_computed_
a_path, a_c_metadata = self.fetch_artifact_archive(
a_uri, dest=tmpdir)
except Exception as e:
logger.warning('Unable to retrieve %s. Reason: %s',
a_uri, e)
status_visit = 'partial'
continue
logger.debug('archive_path: %s', a_path)
logger.debug('archive_computed_metadata: %s',
......@@ -275,15 +282,15 @@ class PackageLoader:
'target_type': 'revision',
'target': x['target'],
}
snapshot = {
'branches': branches
}
snapshot['id'] = identifier_to_bytes(
snapshot_identifier(snapshot))
self.storage.snapshot_add([snapshot])
if branches:
snapshot = {
'branches': branches
}
snapshot['id'] = identifier_to_bytes(
snapshot_identifier(snapshot))
# come so far, we actually reached a full visit
status_visit = 'full'
logger.debug('snapshot: %s', snapshot)
self.storage.snapshot_add([snapshot])
# Update the visit's state
self.storage.origin_visit_update(
......@@ -291,5 +298,6 @@ class PackageLoader:
snapshot=snapshot)
except Exception as e:
logger.warning('Fail to load %s. Reason: %s' % (self.url, e))
status_visit = 'partial'
finally:
return {'status': status_load}
......@@ -66,10 +66,12 @@ def get_response_cb(request, context, ignore_urls=[]):
"""
logger.debug('get_response_cb(%s, %s)', request, context)
url = urlparse(request.url)
if url in ignore_urls:
logger.debug('url: %s', request.url)
logger.debug('ignore_urls: %s', ignore_urls)
if request.url in ignore_urls:
context.status_code = 404
return None
url = urlparse(request.url)
dirname = url.hostname # pypi.org | files.pythonhosted.org
# url.path: pypi/<project>/json -> local file: pypi_<project>_json
filename = url.path[1:]
......
......@@ -17,6 +17,8 @@ from swh.loader.package.pypi import (
from swh.loader.package.tests.common import DATADIR, check_snapshot
from swh.loader.package.tests.conftest import local_get_factory
def test_author_basic():
data = {
......@@ -202,16 +204,18 @@ def test_sdist_parse_failures(tmp_path):
# "edge" cases (for the same origin) #
# no release artifact:
# {visit full, status: uneventful, no contents, etc...}
def test_no_release_artifact(requests_mock):
pass
# no release artifact:
# {visit full, status: uneventful, no contents, etc...}
# problem during loading:
# {visit: partial, status: uneventful, no snapshot}
# problem during loading: failure early enough in between swh contents...
# some contents (contents, directories, etc...) have been written in storage
# {visit: partial, status: eventful, no snapshot}
......@@ -222,6 +226,76 @@ def test_no_release_artifact(requests_mock):
# "normal" cases (for the same origin) #
local_get_missing = local_get_factory(ignore_urls=[
'https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip', # noqa
])
# some missing release artifacts:
# {visit partial, status: eventful, 1 snapshot}
def test_release_with_missing_artifact(swh_config, local_get_missing):
"""Load a pypi project with some missing artifacts ends up with 1 snapshot
"""
loader = PyPILoader('https://pypi.org/project/0805nexter')
actual_load_status = loader.load()
assert actual_load_status == {'status': 'eventful'}
stats = loader.storage.stat_counters()
assert {
'content': 3,
'directory': 2,
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': 1,
'skipped_content': 0,
'snapshot': 1
} == stats
expected_contents = map(hash_to_bytes, [
'405859113963cb7a797642b45f171d6360425d16',
'e5686aa568fdb1d19d7f1329267082fe40482d31',
'83ecf6ec1114fd260ca7a833a2d165e71258c338',
])
assert list(loader.storage.content_missing_per_sha1(expected_contents))\
== []
expected_dirs = map(hash_to_bytes, [
'b178b66bd22383d5f16f4f5c923d39ca798861b4',
'c3a58f8b57433a4b56caaa5033ae2e0931405338',
])
assert list(loader.storage.directory_missing(expected_dirs)) == []
# {revision hash: directory hash}
expected_revs = {
hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'), # noqa
}
assert list(loader.storage.revision_missing(expected_revs)) == []
expected_branches = {
'releases/1.2.0': {
'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
'target_type': 'revision',
},
'HEAD': {
'target': 'releases/1.2.0',
'target_type': 'alias',
},
}
check_snapshot(
'dd0e4201a232b1c104433741dbf45895b8ac9355',
expected_branches,
storage=loader.storage)
def test_release_artifact_no_prior_visit(swh_config, local_get):
"""With no prior visit, load a pypi project ends up with 1 snapshot
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment