From f236f3d16368ef5cde51cc710337590d7a780204 Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <ardumont@softwareheritage.org>
Date: Wed, 2 Aug 2023 17:30:00 +0200
Subject: [PATCH] packagist: Continue listing when github server hangs up

With or without retry (for a future version of swh.core).

This skips the origin when this sporadically happens. It should get picked up by another
listing eventually.

The listing is currently failing to finish when the github server hangs up on the
process. Adding this behavior allows to skip the issue without breaking the listing.
---
 swh/lister/packagist/lister.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py
index c4faeab9..1dd05379 100644
--- a/swh/lister/packagist/lister.py
+++ b/swh/lister/packagist/lister.py
@@ -6,11 +6,12 @@
 from dataclasses import dataclass
 from datetime import datetime, timezone
 import logging
-from typing import Any, Dict, Iterator, List, Optional
 from random import shuffle
+from typing import Any, Dict, Iterator, List, Optional
 
 import iso8601
 import requests
+from tenacity import RetryError
 
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
@@ -248,9 +249,14 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
                 # Non-github urls will be returned as is, github ones will be canonical
                 # ones
                 assert self.github_session is not None
-                origin_url = (
-                    self.github_session.get_canonical_url(origin_url) or origin_url
-                )
+                try:
+                    origin_url = (
+                        self.github_session.get_canonical_url(origin_url) or origin_url
+                    )
+                except (requests.exceptions.ConnectionError, RetryError):
+                    # server hangs up, let's ignore it for now
+                    # that might not happen later on
+                    continue
 
             # bitbucket closed its mercurial hosting service, those origins can not be
             # loaded into the archive anymore
-- 
GitLab