From 56c77f6cdaa093ab120f36043d3a0d7d19a15db3 Mon Sep 17 00:00:00 2001
From: Antoine Lambert <anlambert@softwareheritage.org>
Date: Thu, 19 Dec 2024 15:49:55 +0100
Subject: [PATCH] dumb: Add support for tree and blob refs

Previously the dumb git loader was only considering refs targeting
commits and tags but those can also occasionally target blobs and
trees, so ensure to support such refs.

Fixes #4756.
---
 swh/loader/git/dumb.py              |  6 ++---
 swh/loader/git/tests/test_loader.py | 39 +++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/swh/loader/git/dumb.py b/swh/loader/git/dumb.py
index c5ba46ff..1e94f4a1 100644
--- a/swh/loader/git/dumb.py
+++ b/swh/loader/git/dumb.py
@@ -128,11 +128,9 @@ class GitObjectsFetcher:
         commit_objects = []
         for ref in wants:
             ref_object = self._get_git_object(ref)
-            if ref_object.type_num == Commit.type_num:
+            self.objects[ref_object.type_name].add(ref)
+            if ref_object.type_name == Commit.type_name:
                 commit_objects.append(cast(Commit, ref_object))
-                self.objects[b"commit"].add(ref)
-            else:
-                self.objects[b"tag"].add(ref)
 
         # perform DFS on commits graph
         while commit_objects:
diff --git a/swh/loader/git/tests/test_loader.py b/swh/loader/git/tests/test_loader.py
index 4429df21..63168a89 100644
--- a/swh/loader/git/tests/test_loader.py
+++ b/swh/loader/git/tests/test_loader.py
@@ -34,6 +34,7 @@ from swh.loader.tests import (
     get_stats,
     prepare_repository_from_archive,
 )
+from swh.model.hashutil import hash_to_bytes
 from swh.model.model import (
     MetadataAuthority,
     MetadataAuthorityType,
@@ -43,6 +44,8 @@ from swh.model.model import (
     OriginVisitStatus,
     RawExtrinsicMetadata,
     Snapshot,
+    SnapshotBranch,
+    SnapshotTargetType,
 )
 
 
@@ -974,6 +977,42 @@ class DumbGitLoaderTestBase(FullGitLoaderTests):
         assert b"HEAD" in self.loader.snapshot.branches
         assert self.loader.snapshot.branches[b"HEAD"].target == b"refs/heads/master"
 
+    def test_load_refs_targeting_tree_or_blob(self, mocker):
+        known_tree = "fbf70528223d263661b5ad4b80f26caf3860eb8e"
+        known_blob = "534d61ecee4f6da4d6ca6ddd8abf258208d2d1bc"
+        tree_ref = "refs/tree"
+        blob_ref = "refs/blob"
+
+        class GitObjectsFetcherTreeAndBlobRefs(dumb.GitObjectsFetcher):
+            def _http_get(self, path: str) -> SpooledTemporaryFile:
+                buffer = super()._http_get(path)
+                if path == "info/refs":
+                    # Add two refs targeting blob and tree in the refs list
+                    refs = buffer.read().decode("utf-8")
+                    buffer.seek(0)
+                    buffer.write(
+                        (
+                            f"{known_tree}\t{tree_ref}\n"
+                            f"{known_blob}\t{blob_ref}\n" + refs
+                        ).encode()
+                    )
+                    buffer.flush()
+                    buffer.seek(0)
+                return buffer
+
+        mocker.patch.object(dumb, "GitObjectsFetcher", GitObjectsFetcherTreeAndBlobRefs)
+
+        res = self.loader.load()
+        assert res == {"status": "eventful"}
+
+        assert self.loader.snapshot.branches[tree_ref.encode()] == SnapshotBranch(
+            target=hash_to_bytes(known_tree), target_type=SnapshotTargetType.DIRECTORY
+        )
+
+        assert self.loader.snapshot.branches[blob_ref.encode()] == SnapshotBranch(
+            target=hash_to_bytes(known_blob), target_type=SnapshotTargetType.CONTENT
+        )
+
 
 class TestDumbGitLoaderWithPack(DumbGitLoaderTestBase):
     @classmethod
-- 
GitLab