Skip to content
Snippets Groups Projects
Commit e48ced02 authored by vlorentz's avatar vlorentz
Browse files

package.loader: Write to the ExtID storage

This allows future runs of a loader to know a package was already
loaded, without querying each of the revisions individually and
parsing their metadata.

Eventually, this will allow us to get rid of the 'metadata' column
on the 'revision' table entirely.
parent a32f6871
No related branches found
No related tags found
1 merge request!200package.loader: Write to the ExtID storage
......@@ -2,4 +2,4 @@ swh.core >= 0.3
swh.model >= 1.0.0
swh.objstorage >= 0.2.2
swh.scheduler >= 0.4.0
swh.storage >= 0.13.1
swh.storage >= 0.27.0
......@@ -44,6 +44,7 @@ from swh.model.identifiers import (
ObjectType,
)
from swh.model.model import (
ExtID,
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
......@@ -588,6 +589,7 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]):
branch.target for branch in last_snapshot.branches.values()
}
new_extids: Set[ExtID] = set()
tmp_revisions: Dict[str, List[Tuple[str, Sha1Git]]] = {
version: [] for version in versions
}
......@@ -633,6 +635,16 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]):
if revision_id is None:
continue
partial_extid = p_info.extid()
if partial_extid is not None:
(extid_type, extid) = partial_extid
revision_swhid = CoreSWHID(
object_type=ObjectType.REVISION, object_id=revision_id
)
new_extids.add(
ExtID(extid_type=extid_type, extid=extid, target=revision_swhid)
)
tmp_revisions[version].append((branch_name, revision_id))
if load_exceptions:
......@@ -689,6 +701,8 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]):
status_visit = "partial"
status_load = "failed"
self._load_extids(new_extids)
return self.finalize_visit(
snapshot=snapshot,
visit=visit,
......@@ -1001,3 +1015,14 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]):
}
if fetchers:
self.storage.metadata_fetcher_add(list(deduplicated_fetchers.values()))
def _load_extids(self, extids: Set[ExtID]) -> None:
if not extids:
return
try:
self.storage.extid_add(list(extids))
except Exception as e:
logger.exception("Failed to load new ExtIDs for %s", self.url)
sentry_sdk.capture_exception(e)
# No big deal, it just means the next visit will load the same versions
# again.
......@@ -697,10 +697,10 @@ def test_load_nixguix_one_common_artifact_from_other_loader(
expected_detections = [
{"reason": "'integrity'", "known_artifact": old_revision.metadata,},
{"reason": "'integrity'", "known_artifact": old_revision.metadata,},
]
# as many calls as there are sources listed in the sources.json
assert len(expected_detections) == len(all_sources["sources"])
# less calls than there are sources listed in the sources.json;
# as some of them are skipped using the ExtID from a previous run
assert len(expected_detections) <= len(all_sources["sources"])
assert actual_detections == expected_detections
......@@ -202,8 +202,9 @@ def test_load_get_known_extids() -> None:
)
def test_load_skip_extids() -> None:
"""Checks PackageLoader.load() skips iff it should."""
def test_load_extids() -> None:
"""Checks PackageLoader.load() skips iff it should, and writes (only)
the new ExtIDs"""
storage = get_storage("memory")
origin = "http://example.org"
......@@ -295,6 +296,26 @@ def test_load_skip_extids() -> None:
)
assert snapshot_get_latest(storage, origin) == snapshot
extids = storage.extid_get_from_target(
ObjectType.REVISION,
[
rev1_swhid.object_id,
rev2_swhid.object_id,
rev3_swhid.object_id,
rev4_swhid.object_id,
],
)
assert set(extids) == {
# What we inserted at the beginning of the test:
ExtID("extid-type1", b"extid-of-v1.0", rev1_swhid),
ExtID("extid-type2", b"extid-of-v2.0", rev2_swhid),
# Added by the loader:
ExtID("extid-type1", b"extid-of-v2.0", rev4_swhid),
ExtID("extid-type2", b"extid-of-v3.0", rev4_swhid),
ExtID("extid-type2", b"extid-of-v4.0", rev4_swhid),
}
def test_manifest_extid():
"""Compute primary key should return the right identity
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment