GitHub archives cannot be downloaded if there is a tag and a branch with the same name
eg. when loading "v1.0.2" from https://github.com/gmalecha/mirror-core which has both a tag and a branch with that name:
https://sentry.softwareheritage.org/organizations/swh/issues/104637/?referrer=phabricator_plugin
ReadError: file could not be opened successfully:
- method gz: ReadError('not a gzip file')
- method bz2: ReadError('not a bzip2 file')
- method xz: ReadError('not an lzma file')
- method tar: ReadError('truncated header')
File "shutil.py", line 1201, in _unpack_tarfile
tarobj = tarfile.open(filename)
File "tarfile.py", line 1629, in open
raise ReadError(f"file could not be opened successfully:\n{error_msgs_summary}")
ReadError: /tmp/tmpbg1djs3i/v1.0.2.tar.gz is not a compressed or uncompressed tar file
File "swh/core/tarball.py", line 161, in uncompress
shutil.unpack_archive(tarpath, extract_dir=dest, format=format)
File "shutil.py", line 1264, in unpack_archive
func(filename, extract_dir, **dict(format_info[2]))
File "shutil.py", line 1203, in _unpack_tarfile
raise ReadError(
ValueError: Problem during unpacking /tmp/tmpbg1djs3i/v1.0.2.tar.gz. Reason: /tmp/tmpbg1djs3i/v1.0.2.tar.gz is not a compressed or uncompressed tar file
File "swh/loader/package/loader.py", line 649, in load
res = self._load_release(p_info, origin)
File "swh/loader/package/loader.py", line 824, in _load_release
(uncompressed_path, directory) = self._load_directory(dl_artifacts, tmpdir)
File "swh/loader/package/loader.py", line 787, in _load_directory
uncompressed_path = self.uncompress(dl_artifacts, dest=tmpdir)
File "swh/loader/package/loader.py", line 424, in uncompress
uncompress(a_path, dest=uncompressed_path)
File "swh/core/tarball.py", line 163, in uncompress
raise ValueError(f"Problem during unpacking {tarpath}. Reason: {e}")
Migrated from T4566 (view on Phabricator)