From ceb1b6450ea467379a1d5395a06eedbed6510b9c Mon Sep 17 00:00:00 2001
From: Antoine Lambert <anlambert@softwareheritage.org>
Date: Mon, 3 Mar 2025 14:31:21 +0100
Subject: [PATCH] gnu: Fix KeyError exception due to missing field in JSON data

Latest GNU JSON listing is missing the contents field for a directory
so a KeyError exception was raised by the lister.
---
 .../tests/data/https_ftp.gnu.org/tree.json.gz | Bin 622168 -> 622194 bytes
 swh/lister/gnu/tests/data/tree.json           |   3 ++-
 swh/lister/gnu/tree.py                        |   8 +++++---
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/swh/lister/gnu/tests/data/https_ftp.gnu.org/tree.json.gz b/swh/lister/gnu/tests/data/https_ftp.gnu.org/tree.json.gz
index 34b3b28a09e0c92683ef85c046441ecdd6aa9c23..21424731e26ff05ba327b84012d26e4e3af393d2 100644
GIT binary patch
delta 123
zcmV->0EGY8{3Y`IB@`cv2ng$~#b*EmbaG{7E^2dcZUBJ_g$e<M3Ic@+1BD6%g$f0Q
z3I>G=2Zag<wF(Iyen7kyL=9zSZ82JPPPi)MkG(bW!~ONUdqX^|@Zlc*mlx>u>ewZ;
dgRPDGJ|Yi+W&i*D%m4nD{|}#1b6`~oU;t<iH3a|w

delta 97
zcmV-n0G|Ky{3Y1@B?=#l2mo_2mt6n@ffR)l0fiI-g%ksY6a<A71%(s_g%k&c6bQ8x
z2_AkhWPSQr5BCOkSm8@M{4XyVTI<!Z%RmR?7#BL?M+Z~<|M{2y{V)F?>226L1_@vQ
DQ%5Ah

diff --git a/swh/lister/gnu/tests/data/tree.json b/swh/lister/gnu/tests/data/tree.json
index e4a99d4b..1f2bb9fa 100644
--- a/swh/lister/gnu/tests/data/tree.json
+++ b/swh/lister/gnu/tests/data/tree.json
@@ -69,5 +69,6 @@
         {"type":"file","name":"xboard-4.2.5.tar.gz","size":1055502,"time":"1008466945"},
         {"type":"file","name":"xboard-4.2.6.tar.gz","size":1057625,"time":"1012641715"},
         {"type":"file","name":"xboard-4.2.7.tar.gz","size":1318110,"time":"1070057764"}
-      ]}
+      ]},
+      {"type":"directory","name":"no-contents","size":4096,"time":"1254860068"}
 ]
diff --git a/swh/lister/gnu/tree.py b/swh/lister/gnu/tree.py
index ec48cf08..26e4f2b8 100644
--- a/swh/lister/gnu/tree.py
+++ b/swh/lister/gnu/tree.py
@@ -61,7 +61,7 @@ class GNUTree:
         for directory in raw_data["contents"]:
             if directory["name"] not in self.top_level_directories:
                 continue
-            infos = directory["contents"]
+            infos = directory.get("contents", [])
             for info in infos:
                 if info["type"] == "directory":
                     package_url = "%s/%s/%s/" % (
@@ -69,7 +69,9 @@ class GNUTree:
                         directory["name"],
                         info["name"],
                     )
-                    package_artifacts = find_artifacts(info["contents"], package_url)
+                    package_artifacts = find_artifacts(
+                        info.get("contents", []), package_url
+                    )
                     if package_artifacts != []:
                         repo_details = {
                             "name": info["name"],
@@ -146,7 +148,7 @@ def find_artifacts(
         # It will recursively check for artifacts in all sub-folders
         elif filetype == "directory":
             tarballs_in_dir = find_artifacts(
-                info_file["contents"], url + filename + "/"
+                info_file.get("contents", []), url + filename + "/"
             )
             artifacts.extend(tarballs_in_dir)
 
-- 
GitLab