Skip to content
Snippets Groups Projects
Commit 15389dca authored by vlorentz's avatar vlorentz
Browse files

HgLoaderFromDisk: Stop reading/writing Revision.metadata

It already writes it with raw_extrinsic_metadata_add/extid_add,
and read it with extid_get_*.

This code was only kept for compatibility while we were migrating
the extids. This is now done, so this code is useless.
parent d35b2692
No related branches found
No related tags found
No related merge requests found
......@@ -15,7 +15,7 @@ from swh.loader.core.utils import clean_dangling_folders
from swh.loader.mercurial.utils import parse_visit_date
from swh.model import identifiers
from swh.model.from_disk import Content, DentryPerms, Directory
from swh.model.hashutil import hash_to_bytehex, hash_to_bytes
from swh.model.hashutil import hash_to_bytehex
from swh.model.model import (
ExtID,
ObjectType,
......@@ -206,18 +206,8 @@ class HgLoaderFromDisk(BaseLoader):
def _set_latest_heads(self, latest_snapshot: Snapshot) -> None:
"""
Looks up the nodeid for all revisions in the snapshot, and adds them to
self._latest_heads.
This works in two steps:
1. Query the revisions with extid_get_from_target, to find nodeids from
revision ids, using the new ExtID architecture
2. For all revisions that were not found this way, fetch the revision
and look for the nodeid in its metadata.
This is a temporary process. When we are done migrating away from revision
metadata, step 2 will be removed.
Looks up the nodeid for all revisions in the snapshot via extid_get_from_target,
and adds them to self._latest_heads.
"""
# TODO: add support for releases
snapshot_branches = [
......@@ -248,17 +238,6 @@ class HgLoaderFromDisk(BaseLoader):
# Add the found nodeids to self.latest_heads
self._latest_heads.extend(extid.extid for extid in extids)
# For each revision without a nodeid, get the revision metadata
# to see if it is found there.
found_revisions = {extid.target.object_id for extid in extids if extid}
revisions_without_extid = list(set(snapshot_branches) - found_revisions)
self._latest_heads.extend(
hash_to_bytes(revision.metadata["node"])
for revision in self.storage.revision_get(revisions_without_extid)
if revision and revision.metadata
)
def fetch_data(self) -> bool:
"""Fetch the data from the source the loader is currently loading
......@@ -372,16 +351,14 @@ class HgLoaderFromDisk(BaseLoader):
target=name, target_type=TargetType.ALIAS,
)
# TODO: do not write an ExtID if we got this branch from an ExtID that
# already exists.
# When we are done migrating away from revision metadata, this will
# be as simple as checking if the target is in self._latest_heads
revision_swhid = identifiers.CoreSWHID(
object_type=identifiers.ObjectType.REVISION, object_id=revision_sha1git
)
extids.append(
ExtID(extid_type=EXTID_TYPE, extid=hg_nodeid, target=revision_swhid)
)
if hg_nodeid not in self._latest_heads:
revision_swhid = identifiers.CoreSWHID(
object_type=identifiers.ObjectType.REVISION,
object_id=revision_sha1git,
)
extids.append(
ExtID(extid_type=EXTID_TYPE, extid=hg_nodeid, target=revision_swhid)
)
snapshot = Snapshot(branches=snapshot_branches)
self.storage.snapshot_add([snapshot])
......@@ -486,7 +463,6 @@ class HgLoaderFromDisk(BaseLoader):
type=RevisionType.MERCURIAL,
directory=root_sha1git,
message=rev_ctx.description(),
metadata={"node": hg_nodeid.hex()},
extra_headers=tuple(extra_headers),
synthetic=False,
parents=self.get_revision_parents(rev_ctx),
......
......@@ -18,7 +18,7 @@ from swh.loader.tests import (
prepare_repository_from_archive,
)
from swh.model.from_disk import Content, DentryPerms
from swh.model.hashutil import hash_to_bytes
from swh.model.hashutil import hash_to_bytes, hash_to_hex
from swh.model.identifiers import ObjectType
from swh.model.model import RevisionType, Snapshot, SnapshotBranch, TargetType
from swh.storage import get_storage
......@@ -242,7 +242,11 @@ def test_visit_repository_with_transplant_operations(swh_storage, datadir, tmp_p
hg_changesets = set()
transplant_sources = set()
for rev in loader.storage.revision_log(revisions):
hg_changesets.add(rev["metadata"]["node"])
extids = list(
loader.storage.extid_get_from_target(ObjectType.REVISION, [rev["id"]])
)
assert len(extids) == 1
hg_changesets.add(hash_to_hex(extids[0].extid))
for k, v in rev["extra_headers"]:
if k == b"transplant_source":
transplant_sources.add(v.decode("ascii"))
......@@ -250,7 +254,7 @@ def test_visit_repository_with_transplant_operations(swh_storage, datadir, tmp_p
# check extracted data are valid
assert len(hg_changesets) > 0
assert len(transplant_sources) > 0
assert transplant_sources.issubset(hg_changesets)
assert transplant_sources <= hg_changesets
def _partial_copy_storage(
......@@ -275,13 +279,6 @@ def _partial_copy_storage(
]
new_storage.revision_add(revisions)
elif mechanism == "revision metadata":
assert (
copy_revisions
), "copy_revisions must be True if mechanism='revision metadata'"
revisions = [rev for rev in old_storage.revision_get(heads) if rev]
new_storage.revision_add(revisions)
else:
assert mechanism == "same storage"
return old_storage
......@@ -297,12 +294,11 @@ def _partial_copy_storage(
return new_storage
@pytest.mark.parametrize("mechanism", ("extid", "revision metadata", "same storage"))
@pytest.mark.parametrize("mechanism", ("extid", "same storage"))
def test_load_unchanged_repo_should_be_uneventful(
swh_storage, datadir, tmp_path, mechanism
):
"""Checks the loader can find which revisions it already loaded, using either
ExtIDs or revision metadata."""
"""Checks the loader can find which revisions it already loaded, using ExtIDs."""
archive_name = "hello"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment