From db3c2a1833d13e096d12e707180e26a783746dd6 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Fri, 24 Mar 2023 10:23:39 +0100
Subject: [PATCH] Add basic support for Gitweb in the cgit lister

---
 swh/lister/cgit/lister.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py
index 4a9aeab9..a74e6bcd 100644
--- a/swh/lister/cgit/lister.py
+++ b/swh/lister/cgit/lister.py
@@ -94,9 +94,11 @@ class CGitLister(StatelessLister[Repositories]):
 
             page_results = []
 
-            for tr in bs_idx.find("div", {"class": "content"}).find_all(
-                "tr", {"class": ""}
-            ):
+            for tr in bs_idx.find(
+                "table", {"class": re.compile("(list|project_list)")}
+            ).find_all("tr"):
+                if not tr.find("a"):
+                    continue
                 repository_link = tr.find("a")["href"]
                 repo_url = None
                 git_url = None
@@ -189,6 +191,13 @@ class CGitLister(StatelessLister[Repositories]):
         # <link rel='vcs-git' href='https://...' title='...'/>
         urls = [x["href"] for x in bs.find_all("a", {"rel": "vcs-git"})]
 
+        if not urls:
+            # Try the Gitweb heuristic
+            urls = [
+                row.contents[-1].string
+                for row in bs.find_all("tr", {"class": "metadata_url"})
+            ]
+
         if not urls:
             logger.debug("No git urls found on %s", repository_url)
             return None
-- 
GitLab