From db3c2a1833d13e096d12e707180e26a783746dd6 Mon Sep 17 00:00:00 2001 From: Nicolas Dandrimont <nicolas@dandrimont.eu> Date: Fri, 24 Mar 2023 10:23:39 +0100 Subject: [PATCH] Add basic support for Gitweb in the cgit lister --- swh/lister/cgit/lister.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py index 4a9aeab9..a74e6bcd 100644 --- a/swh/lister/cgit/lister.py +++ b/swh/lister/cgit/lister.py @@ -94,9 +94,11 @@ class CGitLister(StatelessLister[Repositories]): page_results = [] - for tr in bs_idx.find("div", {"class": "content"}).find_all( - "tr", {"class": ""} - ): + for tr in bs_idx.find( + "table", {"class": re.compile("(list|project_list)")} + ).find_all("tr"): + if not tr.find("a"): + continue repository_link = tr.find("a")["href"] repo_url = None git_url = None @@ -189,6 +191,13 @@ class CGitLister(StatelessLister[Repositories]): # <link rel='vcs-git' href='https://...' title='...'/> urls = [x["href"] for x in bs.find_all("a", {"rel": "vcs-git"})] + if not urls: + # Try the Gitweb heuristic + urls = [ + row.contents[-1].string + for row in bs.find_all("tr", {"class": "metadata_url"}) + ] + if not urls: logger.debug("No git urls found on %s", repository_url) return None -- GitLab