From f814e1179d076c14cd5a75dfdf205fbfc5cd2cc4 Mon Sep 17 00:00:00 2001
From: Antoine Lambert <anlambert@softwareheritage.org>
Date: Mon, 8 Jan 2024 15:56:16 +0100
Subject: [PATCH] nixguix: Exploit new submodule info in sources.json from Guix

Guix now provides a "submodule" info in the sources.jon file it
produced so exploit it to set the new "submodules" parameter of
the git-checkout loader in order to retrieve submodules only when
it is required.

Related to swh/devel/swh-loader-git#4751.
---
 swh/lister/nixguix/lister.py                  |  8 +++++-
 .../nixguix/tests/data/sources-success.json   | 27 +++++++++++++++----
 swh/lister/nixguix/tests/test_lister.py       | 13 +++++++--
 3 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py
index 8e1a75e5..09000ae5 100644
--- a/swh/lister/nixguix/lister.py
+++ b/swh/lister/nixguix/lister.py
@@ -111,6 +111,8 @@ class Artifact:
     """Checksum layout mode to provide to loaders (e.g. nar, standard, ...)"""
     ref: Optional[str]
     """Optional reference on the artifact (git commit, branch, svn commit, tag, ...)"""
+    submodules: bool
+    """Indicates if submodules should be retrieved for a git-checkout visit type"""
 
 
 @dataclass
@@ -472,6 +474,7 @@ class NixGuixLister(StatelessLister[PageResult]):
                         checksum_layout=MAPPING_CHECKSUM_LAYOUT[outputHashMode],
                         visit_type=VCS_ARTIFACT_TYPE_TO_VISIT_TYPE[artifact_type],
                         ref=plain_ref,
+                        submodules=artifact.get("submodule", False),
                     )
 
             elif artifact_type == "url":
@@ -605,6 +608,7 @@ class NixGuixLister(StatelessLister[PageResult]):
                     checksum_layout=MAPPING_CHECKSUM_LAYOUT[outputHashMode],
                     visit_type="tarball-directory" if is_tar else "content",
                     ref=None,
+                    submodules=False,
                 )
             else:
                 logger.warning(
@@ -626,13 +630,15 @@ class NixGuixLister(StatelessLister[PageResult]):
     def artifact_to_listed_origin(self, artifact: Artifact) -> Iterator[ListedOrigin]:
         """Given an artifact (tarball, file), yield one ListedOrigin."""
         assert self.lister_obj.id is not None
-        loader_arguments = {
+        loader_arguments: Dict[str, Any] = {
             "checksums": artifact.checksums,
             "checksum_layout": artifact.checksum_layout.value,
             "fallback_urls": artifact.fallback_urls,
         }
         if artifact.ref:
             loader_arguments["ref"] = artifact.ref
+        if artifact.submodules:
+            loader_arguments["submodules"] = artifact.submodules
         yield ListedOrigin(
             lister_id=self.lister_obj.id,
             url=artifact.origin,
diff --git a/swh/lister/nixguix/tests/data/sources-success.json b/swh/lister/nixguix/tests/data/sources-success.json
index b6a40f49..f195e625 100644
--- a/swh/lister/nixguix/tests/data/sources-success.json
+++ b/swh/lister/nixguix/tests/data/sources-success.json
@@ -2,17 +2,23 @@
   "sources": [
     {
       "type": "url",
-      "urls": [ "https://github.com/owner-1/repository-1/revision-1.tgz" ],
+      "urls": [
+        "https://github.com/owner-1/repository-1/revision-1.tgz"
+      ],
       "integrity": "sha256-3vm2Nt+O4zHf3Ovd/qsv1gKTEUwodX9FLxlrQdry0zs="
     },
     {
       "type": "url",
-      "urls": [ "https://github.com/owner-3/repository-1/revision-1.tar" ],
+      "urls": [
+        "https://github.com/owner-3/repository-1/revision-1.tar"
+      ],
       "integrity": "sha256-3vm2Nt+O4zHf3Ovd/qsv1gKTEUwodX9FLxlrQdry0zs="
     },
     {
       "type": "url",
-      "urls": [ "https://example.com/file.txt" ],
+      "urls": [
+        "https://example.com/file.txt"
+      ],
       "integrity": "sha256-Q0copBCnj1b8G1iZw1k0NuYasMcx6QctleltspAgXlM="
     },
     {
@@ -99,7 +105,9 @@
     },
     {
       "type": "url",
-      "urls": ["svn://svn.code.sf.net/p/acme-crossass/code-0/trunk"],
+      "urls": [
+        "svn://svn.code.sf.net/p/acme-crossass/code-0/trunk"
+      ],
       "integrity": "sha256-VifIQ+UEVMKJ+cNS+Xxusazinr5Cgu1lmGuhqj/5Mpk="
     },
     {
@@ -311,8 +319,17 @@
       "outputHashAlgo": "sha256",
       "outputHashMode": "recursive",
       "svn_revision": "1550"
+    },
+    {
+      "type": "git",
+      "git_url": "https://github.com/supercollider/supercollider",
+      "integrity": "sha256-YSXpITazkV/IqMquPfj0hC7oRS2yH399IFJU4qmyd7Y=",
+      "outputHashAlgo": "sha256",
+      "outputHashMode": "recursive",
+      "git_ref": "Version-3.13.0",
+      "submodule": true
     }
   ],
   "version": "1",
   "revision": "cc4e04c26672dd74e5fd0fecb78b435fb55368f7"
-}
+}
\ No newline at end of file
diff --git a/swh/lister/nixguix/tests/test_lister.py b/swh/lister/nixguix/tests/test_lister.py
index eb3c2130..6f304888 100644
--- a/swh/lister/nixguix/tests/test_lister.py
+++ b/swh/lister/nixguix/tests/test_lister.py
@@ -225,6 +225,10 @@ def test_lister_nixguix_ok(datadir, swh_scheduler, requests_mock):
         "https://api.github.com/repos/trie/trie",
         [{"json": {"html_url": "https://github.com/trie/trie.git"}}],
     )
+    requests_mock.get(
+        "https://api.github.com/repos/supercollider/supercollider",
+        [{"json": {"html_url": "https://github.com/supercollider/supercollider"}}],
+    )
     requests_mock.head(
         "http://git.marmaro.de/?p=mmh;a=snapshot;h=431604647f89d5aac7b199a7883e98e56e4ccf9e;sf=tgz",
         headers={"Content-Type": "application/gzip; charset=ISO-8859-1"},
@@ -314,7 +318,7 @@ def test_lister_nixguix_ok(datadir, swh_scheduler, requests_mock):
     # 3 origins have their recursive hash mentioned, they are sent both as vcs and as
     # specific vcs directory to ingest. So they are duplicated with visit_type 'git' and
     # 'git-checkout', 'svn' and 'svn-export', 'hg' and 'hg-checkout'.
-    expected_nb_dictincts_origins = expected_nb_origins - 3
+    expected_nb_dictincts_origins = expected_nb_origins - 4
 
     # 1 page read is 1 origin
     assert listed_result == ListerStats(
@@ -333,6 +337,7 @@ def test_lister_nixguix_ok(datadir, swh_scheduler, requests_mock):
         "https://example.org/rgerganov/footswitch",
         "https://hg.sr.ht/~olly/yoyo",
         "svn://svn.savannah.gnu.org/apl/trunk",
+        "https://github.com/supercollider/supercollider",
     ]:
         duplicated_visit_types.extend(
             [
@@ -342,7 +347,7 @@ def test_lister_nixguix_ok(datadir, swh_scheduler, requests_mock):
             ]
         )
 
-    assert len(duplicated_visit_types) == 6
+    assert len(duplicated_visit_types) == 8
     assert set(duplicated_visit_types) == {
         "git",
         "git-checkout",
@@ -361,6 +366,10 @@ def test_lister_nixguix_ok(datadir, swh_scheduler, requests_mock):
 
         if listed_origin.visit_type in {"git-checkout", "svn-export", "hg-checkout"}:
             assert listed_origin.extra_loader_arguments["ref"] is not None
+            if listed_origin.url == "https://github.com/supercollider/supercollider":
+                assert listed_origin.extra_loader_arguments["submodules"] is True
+            else:
+                assert "submodules" not in listed_origin.extra_loader_arguments
 
         mapping_visit_types[listed_origin.visit_type] += 1
 
-- 
GitLab