Skip to content
Snippets Groups Projects
Unverified Commit d2390dc4 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

loader.pypi: Store pypi origin's releases in storage

Related T421
parent 6a71db66
No related branches found
No related tags found
No related merge requests found
......@@ -3,6 +3,7 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import arrow
import hashlib
import logging
import os
......@@ -14,8 +15,10 @@ from swh.loader.core.utils import clean_dangling_folders
from swh.loader.core.loader import SWHStatelessLoader
from swh.model import hashutil
from swh.model.from_disk import Directory
from swh.model.identifiers import (release_identifier, revision_identifier,
snapshot_identifier, identifier_to_bytes)
from swh.model.identifiers import (
release_identifier, revision_identifier, snapshot_identifier,
identifier_to_bytes, normalize_timestamp
)
from .model import PyPiProject
......@@ -89,8 +92,7 @@ class PyPiClient:
"""
import gzip
from json import dumps
from arrow import utcnow
datepath = utcnow().isoformat()
datepath = arrow.utcnow().isoformat()
fname = os.path.join(self.cache_dir, datepath + '.gz')
with gzip.open(fname, 'w') as f:
f.write(bytes(
......@@ -115,13 +117,18 @@ class PyPiClient:
"""Given a dictionary of releases, retrieve them locally.
"""
_releases = releases.copy()
for version, release in releases.items():
logging.debug('version: %s' % version)
# order the release in time order
_release_versions = list(releases.keys())
_release_versions.sort()
for version in _release_versions:
release = releases[version]
_release = release.copy()
logging.debug('Release version: %s' % version)
path = os.path.join(self.temp_directory, project, version)
os.makedirs(path, exist_ok=True)
filepath = os.path.join(path, release['filename'])
logging.debug('filepath to write: %s' % filepath)
logging.debug('Release local path: %s' % filepath)
r = self.session.get(release['url'])
if not r.ok:
......@@ -150,14 +157,14 @@ class PyPiClient:
os.makedirs(uncompress_path, exist_ok=True)
nature = tarball.uncompress(filepath, uncompress_path)
_releases[version]['directory'] = uncompress_path
_release['directory'] = uncompress_path
artifact = convert_to_hex(hashutil.hash_path(filepath))
artifact['archive_type'] = nature
for key, value in artifact.items():
_releases[version][key] = value
_release[key] = value
return _releases
yield version, _release
class PyPiLoader(SWHStatelessLoader):
......@@ -222,22 +229,27 @@ class PyPiLoader(SWHStatelessLoader):
self.origin_metadata_url = origin_metadata_url
def get_contents(self):
return self.contents
return self._contents
def get_directories(self):
return self.directories()
return self._directories
def get_revisions(self):
return self.revisions
return self._revisions
def get_releases(self):
return self.releases
return self._releases
def get_snapshot(self):
return self.snapshot
return self._snapshot
def fetch_data(self):
"""(override) Retrieve the pypi origin's information
"""(override) Compute pypi data:
- 1. Retrieve project information
- 2. Fetch the releases and uncompress them
- 3. Collection object information (contents, directories,
revisions, releases, snapshot)
"""
project_info = self.pypi_client.info(self.origin_metadata_url)
......@@ -255,16 +267,22 @@ class PyPiLoader(SWHStatelessLoader):
'branches': {}
}
# for each
for version, release in releases.items():
_last_rev = None
for version, release in releases:
_dir_path = release.pop('directory')
directory = Directory.from_disk(path=_dir_path.encode('utf-8'),
save_path=True)
_dir_path = _dir_path.encode('utf-8')
directory = Directory.from_disk(path=_dir_path, data=True)
_objects = directory.collect()
_contents.append(_objects['content'].values())
_directories.append(_objects['directory'].values())
_contents.extend(_objects['content'].values())
_directories.extend(_objects['directory'].values())
date = normalize_timestamp(
int(arrow.get(release['date']).timestamp))
name = release['name'].encode('utf-8')
message = release['message'].encode('utf-8')
_revision = {
'synthetic': True,
'metadata': {
......@@ -272,44 +290,43 @@ class PyPiLoader(SWHStatelessLoader):
'project': info,
},
'author': author,
'date': release['date'],
'date': date,
'committer': author,
'committer_date': release['date'],
'name': release['name'],
'message': release['message'],
'committer_date': date,
'name': name,
'message': message,
'directory': directory.hash,
'parents': [],
'parents': [] if _last_rev is None else [_last_rev['id']],
'type': 'tar',
}
_revision['id'] = identifier_to_bytes(
revision_identifier(_revision))
_revisions.append(_revision)
_last_rev = _revision
_release = {
'name': release['name'],
'name': name,
'author': author,
'date': release['date'],
'message': release['message'],
'date': date,
'message': message,
'target_type': 'revision',
'target': _revision['id'],
'synthetic': False,
}
_release['id'] = identifier_to_bytes(
release_identifier(_release))
_releases.append(_release)
_snapshot['branches'][release['name']] = {
_snapshot['branches'][name] = {
'target': _release['id'],
'target_type': 'release',
}
logging.debug('version: %s' % version)
logging.debug('release: %s' % release['directory'])
_snapshot['id'] = identifier_to_bytes(
snapshot_identifier(_snapshot))
self.contents = _contents
self.directories = _directories
self.revisions = _revisions
self.releases = _releases
self.snapshot = _snapshot
self._contents = _contents
self._directories = _directories
self._revisions = _revisions
self._releases = _releases
self._snapshot = _snapshot
......@@ -4,6 +4,9 @@
# See top-level LICENSE file for more information
import logging
class PyPiProject:
"""PyPi project representation
......@@ -23,15 +26,20 @@ class PyPiProject:
}
def author(self):
name = self.data['info']['author'].encode('utf-8')
email = self.data['info']['author_email'].encode('utf-8')
return {
'fullname': self.data['info']['author'],
'name': self.data['info']['author'],
'email': self.data['info']['author_email']
'fullname': name,
'name': name,
'email': email,
}
def releases(self):
releases = {}
for version, release in self.data['releases'].items():
logging.debug('version: %s, release: %s' % (version, release))
# FIXME: there can be multiple 'package_type' here:
# sdist, bdist_egg, bdist_wheel
if isinstance(release, list):
if len(release) > 1:
raise ValueError( # unexpected so fail so that we
......
......@@ -38,10 +38,12 @@ class ModelTest(TestCase):
def author(self):
actual_author = self.project.author()
name = self.data['info']['author'].encode('utf-8')
email = self.data['info']['author_email'].encode('utf-8')
expected_author = {
'fullname': self.data['info']['author'],
'name': self.data['info']['author'],
'email': self.data['info']['author_email'],
'fullname': name,
'name': name,
'email': email,
}
self.assertEqual(expected_author, actual_author)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment