From 203f6db8f037be7adc6d590b3c698f0b5ae7f16c Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" <ardumont@softwareheritage.org> Date: Wed, 2 Aug 2023 17:28:34 +0200 Subject: [PATCH] packagist: Randomize the packages list To avoid starting always in the same order the packages list when some problems occur in previous listing. --- swh/lister/packagist/lister.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py index 26f529c3..c4faeab9 100644 --- a/swh/lister/packagist/lister.py +++ b/swh/lister/packagist/lister.py @@ -7,6 +7,7 @@ from dataclasses import dataclass from datetime import datetime, timezone import logging from typing import Any, Dict, Iterator, List, Optional +from random import shuffle import iso8601 import requests @@ -123,9 +124,11 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]): def get_pages(self) -> Iterator[PackagistPageType]: """ - Yield a single page listing all Packagist projects. + Yield a single page listing all Packagist projects (randomly). """ - yield self.api_request(self.url)["packageNames"] + package_names = self.api_request(self.url)["packageNames"] + shuffle(package_names) + yield package_names def _get_metadata_from_page( self, package_url_format: str, package_name: str -- GitLab