From 903ff367ec2768ae942fd0e7844e5dcfc852d16d Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <ardumont@softwareheritage.org>
Date: Wed, 2 Aug 2023 16:24:00 +0200
Subject: [PATCH] packagist: Fix json parsing which is different depending on
 page

---
 swh/lister/packagist/lister.py                | 12 +++-
 .../tests/data/ljjackson_linnworks.json       | 68 +++++--------------
 swh/lister/packagist/tests/test_lister.py     |  2 +-
 3 files changed, 30 insertions(+), 52 deletions(-)

diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py
index 5f7a4a53..26f529c3 100644
--- a/swh/lister/packagist/lister.py
+++ b/swh/lister/packagist/lister.py
@@ -143,6 +143,7 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
             metadata_url = package_url_format.format(package_name=package_name)
             metadata = self.api_request(metadata_url)
             packages = metadata.get("packages", {})
+            format_json = metadata.get("minified")
             if not packages:
                 # package metadata not updated since last listing
                 return None
@@ -150,7 +151,16 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
             if package_info is None:
                 # missing package metadata in response
                 return None
-            return package_info.values()  # could be an empty response though -> []
+            logger.debug(
+                "package-name: %s, package-info: %s", package_name, package_info
+            )
+            if format_json == "composer/2.0":  # /p2/ output
+                # In that format, the package info is a list of dict for each package
+                # version
+                return package_info
+            else:
+                # Otherwise, /p/, /packages/ urls returns a dict output
+                return package_info.values()
         except requests.HTTPError:
             # error when getting package metadata (usually 404 when a package has
             # been removed), skip it and process next package
diff --git a/swh/lister/packagist/tests/data/ljjackson_linnworks.json b/swh/lister/packagist/tests/data/ljjackson_linnworks.json
index ba57a81b..41c9207f 100644
--- a/swh/lister/packagist/tests/data/ljjackson_linnworks.json
+++ b/swh/lister/packagist/tests/data/ljjackson_linnworks.json
@@ -1,7 +1,7 @@
 {
   "packages": {
-    "ljjackson/linnworks": {
-      "0.1": {
+    "ljjackson/linnworks": [
+      {
         "name": "ljjackson/linnworks",
         "description": "A PHP API Integration of Linnworks.",
         "keywords": [],
@@ -9,11 +9,13 @@
         "version": "0.1",
         "version_normalized": "0.1.0.0",
         "license": [],
-        "authors": [{
-          "name": "Liam Jackson",
-          "homepage": "https://github.com/ljjackson",
-          "role": "Developer"
-        }],
+        "authors": [
+          {
+            "name": "Liam Jackson",
+            "homepage": "https://github.com/ljjackson",
+            "role": "Developer"
+          }
+        ],
         "source": {
           "type": "git",
           "url": "https://github.com/ljjackson/linnworks.git",
@@ -37,47 +39,13 @@
           "guzzlehttp/guzzle": "^6.3",
           "ext-json": "*"
         },
-        "uid": 2535139
-      },
-      "dev-master": {
-        "name": "ljjackson/linnworks",
-        "description": "A PHP API Integration of Linnworks.",
-        "keywords": [],
-        "homepage": "https://github.com/ljjackson",
-        "version": "dev-master",
-        "version_normalized": "9999999-dev",
-        "license": [],
-        "authors": [{
-          "name": "Liam Jackson",
-          "homepage": "https://github.com/ljjackson",
-          "role": "Developer"
-        }],
-        "source": {
-          "type": "git",
-          "url": "https://github.com/ljjackson/linnworks.git",
-          "reference": "7c6b1209dc3bafad4284b130bda8450f3478ea26"
-        },
-        "dist": {
-          "type": "zip",
-          "url": "https://api.github.com/repos/ljjackson/linnworks/zipball/7c6b1209dc3bafad4284b130bda8450f3478ea26",
-          "reference": "7c6b1209dc3bafad4284b130bda8450f3478ea26",
-          "shasum": ""
-        },
-        "type": "library",
-        "time": "2018-11-01T21:45:50+00:00",
-        "autoload": {
-          "psr-4": {
-            "LJJackson\\Linnworks\\": "src/"
-          }
-        },
-        "require": {
-          "guzzlehttp/guzzle": "^6.3",
-          "ext-json": "*",
-          "php": "^7.1.3",
-          "nesbot/carbon": "*"
-        },
-        "uid": 2517334
+        "abandoned": true,
+        "support": {
+          "issues": "https://github.com/ljjackson/linnworks/issues",
+          "source": "https://github.com/ljjackson/linnworks/tree/0.1"
+        }
       }
-    }
-  }
-}
\ No newline at end of file
+    ]
+  },
+  "minified": "composer/2.0"
+}
diff --git a/swh/lister/packagist/tests/test_lister.py b/swh/lister/packagist/tests/test_lister.py
index fa8cdfcf..5591314d 100644
--- a/swh/lister/packagist/tests/test_lister.py
+++ b/swh/lister/packagist/tests/test_lister.py
@@ -66,7 +66,7 @@ def test_packagist_lister(swh_scheduler, requests_mock, datadir, requests_mock_d
         (
             "https://github.com/ljjackson/linnworks.git",  # API goes 404
             "git",
-            datetime.datetime.fromisoformat("2018-11-01T21:45:50+00:00"),
+            datetime.datetime.fromisoformat("2018-10-22T19:52:25+00:00"),
         ),
         (
             "https://github.com/spryker-eco/computop-api",  # SSH URL in manifest
-- 
GitLab