HgLoaderFromDisk: Stop reading/writing Revision.metadata

It already writes it with raw_extrinsic_metadata_add/extid_add, and read it with extid_get_*. This code was only kept for compatibility while we were migrating the extids. This is now done, so this code is useless.

HgLoaderFromDisk: Stop reading/writing Revision.metadata
It already writes it with raw_extrinsic_metadata_add/extid_add, and read it with extid_get_*. This code was only kept for compatibility while we were migrating the extids. This is now done, so this code is useless.
15389dca · vlorentz · d35b2692 · 15389dca · 15389dca
Commit 15389dca authored 3 years ago by vlorentz
--- a/swh/loader/mercurial/from_disk.py
+++ b/swh/loader/mercurial/from_disk.py
@@ -15,7 +15,7 @@ from swh.loader.core.utils import clean_dangling_folders
 from swh.loader.mercurial.utils import parse_visit_date
 from swh.model import identifiers
 from swh.model.from_disk import Content, DentryPerms, Directory
-from swh.model.hashutil import hash_to_bytehex, hash_to_bytes
+from swh.model.hashutil import hash_to_bytehex
 from swh.model.model import (
    ExtID,
    ObjectType,
@@ -206,18 +206,8 @@ class HgLoaderFromDisk(BaseLoader):

    def _set_latest_heads(self, latest_snapshot: Snapshot) -> None:
        """
-        Looks up the nodeid for all revisions in the snapshot, and adds them to
-        self._latest_heads.
-
-        This works in two steps:
-
-        1. Query the revisions with extid_get_from_target, to find nodeids from
-           revision ids, using the new ExtID architecture
-        2. For all revisions that were not found this way, fetch the revision
-           and look for the nodeid in its metadata.
-
-        This is a temporary process. When we are done migrating away from revision
-        metadata, step 2 will be removed.
+        Looks up the nodeid for all revisions in the snapshot via extid_get_from_target,
+        and adds them to self._latest_heads.
        """
        # TODO: add support for releases
        snapshot_branches = [
@@ -248,17 +238,6 @@ class HgLoaderFromDisk(BaseLoader):
            # Add the found nodeids to self.latest_heads
            self._latest_heads.extend(extid.extid for extid in extids)

-        # For each revision without a nodeid, get the revision metadata
-        # to see if it is found there.
-        found_revisions = {extid.target.object_id for extid in extids if extid}
-        revisions_without_extid = list(set(snapshot_branches) - found_revisions)
-
-        self._latest_heads.extend(
-            hash_to_bytes(revision.metadata["node"])
-            for revision in self.storage.revision_get(revisions_without_extid)
-            if revision and revision.metadata
-        )
-
    def fetch_data(self) -> bool:
        """Fetch the data from the source the loader is currently loading

@@ -372,16 +351,14 @@ class HgLoaderFromDisk(BaseLoader):
                    target=name, target_type=TargetType.ALIAS,
                )

-            # TODO: do not write an ExtID if we got this branch from an ExtID that
-            # already exists.
-            # When we are done migrating away from revision metadata, this will
-            # be as simple as checking if the target is in self._latest_heads
-            revision_swhid = identifiers.CoreSWHID(
-                object_type=identifiers.ObjectType.REVISION, object_id=revision_sha1git
-            )
-            extids.append(
-                ExtID(extid_type=EXTID_TYPE, extid=hg_nodeid, target=revision_swhid)
-            )
+            if hg_nodeid not in self._latest_heads:
+                revision_swhid = identifiers.CoreSWHID(
+                    object_type=identifiers.ObjectType.REVISION,
+                    object_id=revision_sha1git,
+                )
+                extids.append(
+                    ExtID(extid_type=EXTID_TYPE, extid=hg_nodeid, target=revision_swhid)
+                )

        snapshot = Snapshot(branches=snapshot_branches)
        self.storage.snapshot_add([snapshot])
@@ -486,7 +463,6 @@ class HgLoaderFromDisk(BaseLoader):
            type=RevisionType.MERCURIAL,
            directory=root_sha1git,
            message=rev_ctx.description(),
-            metadata={"node": hg_nodeid.hex()},
            extra_headers=tuple(extra_headers),
            synthetic=False,
            parents=self.get_revision_parents(rev_ctx),

--- a/swh/loader/mercurial/tests/test_from_disk.py
+++ b/swh/loader/mercurial/tests/test_from_disk.py
@@ -18,7 +18,7 @@ from swh.loader.tests import (
    prepare_repository_from_archive,
 )
 from swh.model.from_disk import Content, DentryPerms
-from swh.model.hashutil import hash_to_bytes
+from swh.model.hashutil import hash_to_bytes, hash_to_hex
 from swh.model.identifiers import ObjectType
 from swh.model.model import RevisionType, Snapshot, SnapshotBranch, TargetType
 from swh.storage import get_storage
@@ -242,7 +242,11 @@ def test_visit_repository_with_transplant_operations(swh_storage, datadir, tmp_p
    hg_changesets = set()
    transplant_sources = set()
    for rev in loader.storage.revision_log(revisions):
-        hg_changesets.add(rev["metadata"]["node"])
+        extids = list(
+            loader.storage.extid_get_from_target(ObjectType.REVISION, [rev["id"]])
+        )
+        assert len(extids) == 1
+        hg_changesets.add(hash_to_hex(extids[0].extid))
        for k, v in rev["extra_headers"]:
            if k == b"transplant_source":
                transplant_sources.add(v.decode("ascii"))
@@ -250,7 +254,7 @@ def test_visit_repository_with_transplant_operations(swh_storage, datadir, tmp_p
    # check extracted data are valid
    assert len(hg_changesets) > 0
    assert len(transplant_sources) > 0
-    assert transplant_sources.issubset(hg_changesets)
+    assert transplant_sources <= hg_changesets


 def _partial_copy_storage(
@@ -275,13 +279,6 @@ def _partial_copy_storage(
            ]
            new_storage.revision_add(revisions)

-    elif mechanism == "revision metadata":
-        assert (
-            copy_revisions
-        ), "copy_revisions must be True if mechanism='revision metadata'"
-        revisions = [rev for rev in old_storage.revision_get(heads) if rev]
-        new_storage.revision_add(revisions)
-
    else:
        assert mechanism == "same storage"
        return old_storage
@@ -297,12 +294,11 @@ def _partial_copy_storage(
    return new_storage


-@pytest.mark.parametrize("mechanism", ("extid", "revision metadata", "same storage"))
+@pytest.mark.parametrize("mechanism", ("extid", "same storage"))
 def test_load_unchanged_repo_should_be_uneventful(
    swh_storage, datadir, tmp_path, mechanism
 ):
-    """Checks the loader can find which revisions it already loaded, using either
-    ExtIDs or revision metadata."""
+    """Checks the loader can find which revisions it already loaded, using ExtIDs."""
    archive_name = "hello"
    archive_path = os.path.join(datadir, f"{archive_name}.tgz")
    repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)