From ceb1b6450ea467379a1d5395a06eedbed6510b9c Mon Sep 17 00:00:00 2001 From: Antoine Lambert <anlambert@softwareheritage.org> Date: Mon, 3 Mar 2025 14:31:21 +0100 Subject: [PATCH] gnu: Fix KeyError exception due to missing field in JSON data Latest GNU JSON listing is missing the contents field for a directory so a KeyError exception was raised by the lister. --- .../tests/data/https_ftp.gnu.org/tree.json.gz | Bin 622168 -> 622194 bytes swh/lister/gnu/tests/data/tree.json | 3 ++- swh/lister/gnu/tree.py | 8 +++++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/swh/lister/gnu/tests/data/https_ftp.gnu.org/tree.json.gz b/swh/lister/gnu/tests/data/https_ftp.gnu.org/tree.json.gz index 34b3b28a09e0c92683ef85c046441ecdd6aa9c23..21424731e26ff05ba327b84012d26e4e3af393d2 100644 GIT binary patch delta 123 zcmV->0EGY8{3Y`IB@`cv2ng$~#b*EmbaG{7E^2dcZUBJ_g$e<M3Ic@+1BD6%g$f0Q z3I>G=2Zag<wF(Iyen7kyL=9zSZ82JPPPi)MkG(bW!~ONUdqX^|@Zlc*mlx>u>ewZ; dgRPDGJ|Yi+W&i*D%m4nD{|}#1b6`~oU;t<iH3a|w delta 97 zcmV-n0G|Ky{3Y1@B?=#l2mo_2mt6n@ffR)l0fiI-g%ksY6a<A71%(s_g%k&c6bQ8x z2_AkhWPSQr5BCOkSm8@M{4XyVTI<!Z%RmR?7#BL?M+Z~<|M{2y{V)F?>226L1_@vQ DQ%5Ah diff --git a/swh/lister/gnu/tests/data/tree.json b/swh/lister/gnu/tests/data/tree.json index e4a99d4b..1f2bb9fa 100644 --- a/swh/lister/gnu/tests/data/tree.json +++ b/swh/lister/gnu/tests/data/tree.json @@ -69,5 +69,6 @@ {"type":"file","name":"xboard-4.2.5.tar.gz","size":1055502,"time":"1008466945"}, {"type":"file","name":"xboard-4.2.6.tar.gz","size":1057625,"time":"1012641715"}, {"type":"file","name":"xboard-4.2.7.tar.gz","size":1318110,"time":"1070057764"} - ]} + ]}, + {"type":"directory","name":"no-contents","size":4096,"time":"1254860068"} ] diff --git a/swh/lister/gnu/tree.py b/swh/lister/gnu/tree.py index ec48cf08..26e4f2b8 100644 --- a/swh/lister/gnu/tree.py +++ b/swh/lister/gnu/tree.py @@ -61,7 +61,7 @@ class GNUTree: for directory in raw_data["contents"]: if directory["name"] not in self.top_level_directories: continue - infos = directory["contents"] + infos = directory.get("contents", []) for info in infos: if info["type"] == "directory": package_url = "%s/%s/%s/" % ( @@ -69,7 +69,9 @@ class GNUTree: directory["name"], info["name"], ) - package_artifacts = find_artifacts(info["contents"], package_url) + package_artifacts = find_artifacts( + info.get("contents", []), package_url + ) if package_artifacts != []: repo_details = { "name": info["name"], @@ -146,7 +148,7 @@ def find_artifacts( # It will recursively check for artifacts in all sub-folders elif filetype == "directory": tarballs_in_dir = find_artifacts( - info_file["contents"], url + filename + "/" + info_file.get("contents", []), url + filename + "/" ) artifacts.extend(tarballs_in_dir) -- GitLab