diff --git a/swh/loader/pypi/model.py b/swh/loader/pypi/model.py index 88957a941b49d4ba55c97511cc9ff46903370098..94fff20860de6a81766286e2ac1621c61ee52b90 100644 --- a/swh/loader/pypi/model.py +++ b/swh/loader/pypi/model.py @@ -100,6 +100,47 @@ class PyPiProject: """ return author(self._data(release_name)) + def _parse_release_artifact(self, version, release): + """Heuristically determine the release artifact to use as a release + file. + + Choose amongst package type 'sdist' (source) file + Others are not considered (yet?) + + """ + sdist = [] + # FIXME: there can be multiple 'package_type' here: + # sdist, bdist_egg, bdist_wheel, bdist_rpm, bdist_msi, bdist_wininst + if isinstance(release, list): + if len(release) > 1: + logging.warn('%s %s: Multiple release artifacts (%s)' % ( + self.project, version, [ + (rel['packagetype'], rel['filename']) + for rel in release])) + + sdist = [] + # Will try to filter on 'sdist' package type (source code) + for rel in release: + _type = rel['packagetype'] + if _type == 'sdist': + sdist.append(rel) + + if not sdist: + logging.warn('%s %s: No source artifact found, skipping' % ( + self.project, version)) + return + + if len(sdist) > 1: + logging.warn( + '%s %s: Multiple sdist files detected (%s)!' % ( + self.project, version, + ','.join([rel['filename'] for rel in sdist]) + )) + + # FIXME: take the first one? + release = release[0] + return release + def releases(self): """Fetch metadata and data per release. @@ -118,15 +159,10 @@ class PyPiProject: else: release_info = self.info(release_name=version) - # FIXME: there can be multiple 'package_type' here: - # sdist, bdist_egg, bdist_wheel - if isinstance(release, list): - if not release: - continue - if len(release) > 1: - raise ValueError( - 'Unsupported other formats for now, failing!') - release = release[0] + if not release: + continue + + release = self._parse_release_artifact(version, release) # flatten the metadata to ease reading _flattenned_release = {