Skip to content
Snippets Groups Projects
Commit 584777f3 authored by Antoine Lambert's avatar Antoine Lambert
Browse files

package/archive: Add snapshot_append parameter to ArchiveLoader

It enables to append the latest snapshot content of an origin each
time the loader is invoked.

The purpose if to keep track of all the origin artifacts loaded so
far in each new visit of the origin.

Closes T3347
parent 0e4bb4bb
No related branches found
Tags v0.22.1
1 merge request!212package/archive: Add snapshot_append parameter to ArchiveLoader
......@@ -8,7 +8,7 @@ import hashlib
import logging
from os import path
import string
from typing import Any, Dict, Iterator, Optional, Sequence, Tuple, Union
from typing import Any, Dict, Iterator, Mapping, Optional, Sequence, Tuple, Union
import attr
import iso8601
......@@ -84,6 +84,7 @@ class ArchiveLoader(PackageLoader[ArchivePackageInfo]):
artifacts: Sequence[Dict[str, Any]],
extid_manifest_format: Optional[str] = None,
max_content_size: Optional[int] = None,
snapshot_append: bool = False,
):
f"""Loader constructor.
......@@ -107,6 +108,8 @@ class ArchiveLoader(PackageLoader[ArchivePackageInfo]):
extid_manifest_format: template string used to format a manifest,
which is hashed to get the extid of a package.
Defaults to {ArchivePackageInfo.MANIFEST_FORMAT!r}
snapshot_append: if :const:`True`, append latest snapshot content to
the new snapshot created by the loader
"""
super().__init__(storage=storage, url=url, max_content_size=max_content_size)
......@@ -116,6 +119,7 @@ class ArchiveLoader(PackageLoader[ArchivePackageInfo]):
if extid_manifest_format is None
else string.Template(extid_manifest_format)
)
self.snapshot_append = snapshot_append
def get_versions(self) -> Sequence[str]:
versions = []
......@@ -164,3 +168,9 @@ class ArchiveLoader(PackageLoader[ArchivePackageInfo]):
directory=directory,
synthetic=True,
)
def extra_branches(self) -> Dict[bytes, Mapping[str, Any]]:
if not self.snapshot_append:
return {}
last_snapshot = self.last_snapshot()
return last_snapshot.to_dict()["branches"] if last_snapshot else {}
......@@ -9,7 +9,9 @@ from swh.loader.package.archive.loader import ArchiveLoader
@shared_task(name=__name__ + ".LoadArchive")
def load_archive_files(*, url=None, artifacts=None):
def load_archive_files(*, url=None, artifacts=None, snapshot_append=False):
"""Load archive's artifacts (e.g gnu, etc...)"""
loader = ArchiveLoader.from_configfile(url=url, artifacts=artifacts)
loader = ArchiveLoader.from_configfile(
url=url, artifacts=artifacts, snapshot_append=snapshot_append
)
return loader.load()
......@@ -22,7 +22,14 @@ GNU_ARTIFACTS = [
"length": 221837,
"filename": "8sync-0.1.0.tar.gz",
"version": "0.1.0",
}
},
{
"time": 1480991830,
"url": "https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz",
"length": 238466,
"filename": "8sync-0.2.0.tar.gz",
"version": "0.2.0",
},
]
_expected_new_contents_first_visit = [
......@@ -115,7 +122,7 @@ def test_archive_visit_with_release_artifact_no_prior_visit(
"""With no prior visit, load a gnu project ends up with 1 snapshot
"""
loader = ArchiveLoader(swh_storage, URL, artifacts=GNU_ARTIFACTS)
loader = ArchiveLoader(swh_storage, URL, artifacts=GNU_ARTIFACTS[:1])
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
......@@ -173,7 +180,7 @@ def test_archive_2_visits_without_change(swh_storage, requests_mock_datadir):
"""
url = URL
loader = ArchiveLoader(swh_storage, url, artifacts=GNU_ARTIFACTS)
loader = ArchiveLoader(swh_storage, url, artifacts=GNU_ARTIFACTS[:1])
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
......@@ -229,13 +236,7 @@ def test_archive_2_visits_with_new_artifact(swh_storage, requests_mock_datadir):
]
assert len(urls) == 1
artifact2 = {
"time": 1480991830,
"url": "https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz",
"length": 238466,
"filename": "8sync-0.2.0.tar.gz",
"version": "0.2.0",
}
artifact2 = GNU_ARTIFACTS[1]
loader2 = ArchiveLoader(swh_storage, url, [artifact1, artifact2])
stats2 = get_stats(swh_storage)
......@@ -341,3 +342,77 @@ def test_archive_extid():
with pytest.raises(KeyError):
p_info.extid(manifest_format=string.Template("$a $unknown_key"))
def test_archive_snapshot_append(swh_storage, requests_mock_datadir):
# first loading with a first artifact
artifact1 = GNU_ARTIFACTS[0]
loader = ArchiveLoader(swh_storage, URL, [artifact1], snapshot_append=True)
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
assert actual_load_status["snapshot_id"] is not None
assert_last_visit_matches(swh_storage, URL, status="full", type="tar")
# check expected snapshot
snapshot = loader.last_snapshot()
assert len(snapshot.branches) == 2
branch_artifact1_name = f"releases/{artifact1['version']}".encode()
assert b"HEAD" in snapshot.branches
assert branch_artifact1_name in snapshot.branches
assert snapshot.branches[b"HEAD"].target == branch_artifact1_name
# second loading with a second artifact
artifact2 = GNU_ARTIFACTS[1]
loader = ArchiveLoader(swh_storage, URL, [artifact2], snapshot_append=True)
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
assert actual_load_status["snapshot_id"] is not None
assert_last_visit_matches(swh_storage, URL, status="full", type="tar")
# check expected snapshot, should contain a new branch and the
# branch for the first artifact
snapshot = loader.last_snapshot()
assert len(snapshot.branches) == 3
branch_artifact2_name = f"releases/{artifact2['version']}".encode()
assert b"HEAD" in snapshot.branches
assert branch_artifact2_name in snapshot.branches
assert branch_artifact1_name in snapshot.branches
assert snapshot.branches[b"HEAD"].target == branch_artifact2_name
def test_archive_snapshot_append_branch_override(swh_storage, requests_mock_datadir):
# first loading for a first artifact
artifact1 = GNU_ARTIFACTS[0]
loader = ArchiveLoader(swh_storage, URL, [artifact1], snapshot_append=True)
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
assert actual_load_status["snapshot_id"] is not None
assert_last_visit_matches(swh_storage, URL, status="full", type="tar")
# check expected snapshot
snapshot = loader.last_snapshot()
assert len(snapshot.branches) == 2
branch_artifact1_name = f"releases/{artifact1['version']}".encode()
assert branch_artifact1_name in snapshot.branches
branch_target_first_visit = snapshot.branches[branch_artifact1_name].target
# second loading for a second artifact with same version as the first one
# but with different tarball content
artifact2 = dict(GNU_ARTIFACTS[0])
artifact2["url"] = GNU_ARTIFACTS[1]["url"]
artifact2["time"] = GNU_ARTIFACTS[1]["time"]
artifact2["length"] = GNU_ARTIFACTS[1]["length"]
loader = ArchiveLoader(swh_storage, URL, [artifact2], snapshot_append=True)
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
assert actual_load_status["snapshot_id"] is not None
assert_last_visit_matches(swh_storage, URL, status="full", type="tar")
# check expected snapshot, should contain the same branch as previously
# but with different target
snapshot = loader.last_snapshot()
assert len(snapshot.branches) == 2
assert branch_artifact1_name in snapshot.branches
branch_target_second_visit = snapshot.branches[branch_artifact1_name].target
assert branch_target_first_visit != branch_target_second_visit
......@@ -19,3 +19,20 @@ def test_tasks_archive_loader(
assert res.successful()
assert mock_load.called
assert res.result == {"status": "eventful"}
def test_tasks_archive_loader_snapshot_append(
mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
):
mock_load = mocker.patch("swh.loader.package.archive.loader.ArchiveLoader.load")
mock_load.return_value = {"status": "eventful"}
res = swh_scheduler_celery_app.send_task(
"swh.loader.package.archive.tasks.LoadArchive",
kwargs=dict(url="https://gnu.org/", artifacts=[], snapshot_append=True),
)
assert res
res.wait()
assert res.successful()
assert mock_load.called
assert res.result == {"status": "eventful"}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment