From f236f3d16368ef5cde51cc710337590d7a780204 Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" <ardumont@softwareheritage.org> Date: Wed, 2 Aug 2023 17:30:00 +0200 Subject: [PATCH] packagist: Continue listing when github server hangs up With or without retry (for a future version of swh.core). This skips the origin when this sporadically happens. It should get picked up by another listing eventually. The listing is currently failing to finish when the github server hangs up on the process. Adding this behavior allows to skip the issue without breaking the listing. --- swh/lister/packagist/lister.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py index c4faeab9..1dd05379 100644 --- a/swh/lister/packagist/lister.py +++ b/swh/lister/packagist/lister.py @@ -6,11 +6,12 @@ from dataclasses import dataclass from datetime import datetime, timezone import logging -from typing import Any, Dict, Iterator, List, Optional from random import shuffle +from typing import Any, Dict, Iterator, List, Optional import iso8601 import requests +from tenacity import RetryError from swh.scheduler.interface import SchedulerInterface from swh.scheduler.model import ListedOrigin @@ -248,9 +249,14 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]): # Non-github urls will be returned as is, github ones will be canonical # ones assert self.github_session is not None - origin_url = ( - self.github_session.get_canonical_url(origin_url) or origin_url - ) + try: + origin_url = ( + self.github_session.get_canonical_url(origin_url) or origin_url + ) + except (requests.exceptions.ConnectionError, RetryError): + # server hangs up, let's ignore it for now + # that might not happen later on + continue # bitbucket closed its mercurial hosting service, those origins can not be # loaded into the archive anymore -- GitLab