diff --git a/swh/loader/pypi/client.py b/swh/loader/pypi/client.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad1e74883aa807d6f9f470b16d32ae21bd87de22
--- /dev/null
+++ b/swh/loader/pypi/client.py
@@ -0,0 +1,211 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import arrow
+import hashlib
+import logging
+import os
+import requests
+
+from swh.core import tarball
+from swh.model import hashutil
+
+try:
+    from swh.loader.pypi._version import __version__
+except ImportError:
+    __version__ = 'devel'
+
+
+def convert_to_hex(d):
+    """Convert a flat dictionary with bytes in values to the same dictionary
+    with hex as values.
+
+    Args:
+        dict: flat dictionary with sha bytes in their values.
+
+    Returns:
+        Mirror dictionary with values as string hex.
+
+    """
+    if not d:
+        return d
+
+    checksums = {}
+    for key, h in d.items():
+        if isinstance(h, bytes):
+            checksums[key] = hashutil.hash_to_hex(h)
+        else:
+            checksums[key] = h
+
+    return checksums
+
+
+class PyPiClient:
+    """PyPi client in charge of discussing with the pypi server.
+
+    """
+    def __init__(self, temp_directory=None, cache=False, cache_dir=None):
+        self.version = __version__
+        if not temp_directory:
+            from tempfile import mkdtemp
+            self.temp_directory = mkdtemp(dir=temp_directory,
+                                          prefix='swh.loader.pypi.client')
+        else:
+            self.temp_directory = temp_directory
+
+        self.do_cache = cache
+        if self.do_cache:
+            self.cache_dir = cache_dir
+            os.makedirs(self.cache_dir, exist_ok=True)
+        self.session = requests.session()
+        self.params = {
+            'headers': {
+                'User-Agent': 'Software Heritage PyPi Loader (%s)' % (
+                    __version__
+                )
+            }
+        }
+
+    def _save_response(self, response):
+        """Log the response from a server request to a cache dir.
+
+        Args:
+            response: full server response
+            cache_dir: system path for cache dir
+        Returns:
+            nothing
+        """
+        import gzip
+        from json import dumps
+        datepath = arrow.utcnow().isoformat()
+        fname = os.path.join(self.cache_dir, datepath + '.gz')
+        with gzip.open(fname, 'w') as f:
+            f.write(bytes(
+                dumps(response.json()),
+                'utf-8'
+            ))
+
+    def _get(self, url):
+        """Get query to the url.
+
+        """
+        response = self.session.get(url, **self.params)
+        if response.status_code != 200:
+            raise ValueError('Error during query request %s' % self.origin_url)
+
+        if self.do_cache:
+            self._save_response(response)
+
+        return response.json()
+
+    def info(self, project_url):
+        """Given a metadata project url, retrieve the raw json response
+
+        """
+        return self._get(project_url)
+
+    def release(self, project, release):
+        """Given a project and a release name, retrieve the raw json response
+
+        """
+        release_url = 'https://pypi.org/pypi/%s/%s/json' % (project, release)
+        return self._get(release_url)
+
+    def fetch_release(self, project, release):
+        version = release['name']
+        logging.debug('Release version: %s' % version)
+        path = os.path.join(self.temp_directory, project, version)
+        os.makedirs(path, exist_ok=True)
+        filepath = os.path.join(path, release['filename'])
+        logging.debug('Release local path: %s' % filepath)
+
+        r = self.session.get(release['url'], **self.params)
+        if not r.ok:
+            raise ValueError('Fail to retrieve release %s' % version)
+
+        # checks
+        _len = len(r.content)
+        if _len != release['size']:
+            raise ValueError('Error when checking size: %s != %s' % (
+                release['size'], _len))
+
+        # checking digest and writing
+        h = hashlib.sha256()
+        with open(filepath, 'wb') as f:
+            for chunk in r.iter_content():
+                h.update(chunk)
+                f.write(chunk)
+
+        actual_digest = h.hexdigest()
+        if actual_digest != release['sha256']:
+            raise ValueError(
+                'Error when checking the hash checksum: %s != %s' % (
+                    release['sha256'], actual_digest))
+
+        uncompress_path = os.path.join(path, 'uncompress')
+        os.makedirs(uncompress_path, exist_ok=True)
+
+        nature = tarball.uncompress(filepath, uncompress_path)
+        release['directory'] = uncompress_path
+
+        artifact = convert_to_hex(hashutil.hash_path(filepath))
+        artifact['archive_type'] = nature
+        for key, value in artifact.items():
+            release[key] = value
+
+        return release
+
+    def retrieve_releases(self, project, releases):
+        """Given a dictionary of releases, retrieve them locally.
+
+        """
+        # order the release in time order
+        _release_versions = list(releases.keys())
+        _release_versions.sort()
+
+        for version in _release_versions:
+            release = releases[version]
+            _release = release.copy()
+            logging.debug('Release version: %s' % version)
+            path = os.path.join(self.temp_directory, project, version)
+            os.makedirs(path, exist_ok=True)
+            filepath = os.path.join(path, release['filename'])
+            logging.debug('Release local path: %s' % filepath)
+
+            r = self.session.get(release['url'])
+            if not r.ok:
+                raise ValueError('Fail to retrieve release %s' % version)
+
+            # checks
+            _len = len(r.content)
+            if _len != release['size']:
+                raise ValueError('Error when checking size: %s != %s' % (
+                    release['size'], _len))
+
+            # checking digest and writing
+            h = hashlib.sha256()
+            with open(filepath, 'wb') as f:
+                for chunk in r.iter_content():
+                    h.update(chunk)
+                    f.write(chunk)
+
+            actual_digest = h.hexdigest()
+            if actual_digest != release['sha256']:
+                raise ValueError(
+                    'Error when checking the hash checksum: %s != %s' % (
+                        release['sha256'], actual_digest))
+
+            uncompress_path = os.path.join(path, 'uncompress')
+            os.makedirs(uncompress_path, exist_ok=True)
+
+            nature = tarball.uncompress(filepath, uncompress_path)
+            _release['directory'] = uncompress_path
+
+            artifact = convert_to_hex(hashutil.hash_path(filepath))
+            artifact['archive_type'] = nature
+            for key, value in artifact.items():
+                _release[key] = value
+
+            yield version, _release
diff --git a/swh/loader/pypi/loader.py b/swh/loader/pypi/loader.py
index 0824234cd603f5b9389ba546d481387ab87905ff..ba97c4761ae216aebb6e3a5bbf4d6da34e393dca 100644
--- a/swh/loader/pypi/loader.py
+++ b/swh/loader/pypi/loader.py
@@ -4,169 +4,25 @@
 # See top-level LICENSE file for more information
 
 import arrow
-import hashlib
 import logging
 import os
-import requests
 import shutil
 
-from swh.core import tarball
 from swh.loader.core.utils import clean_dangling_folders
 from swh.loader.core.loader import SWHStatelessLoader
-from swh.model import hashutil
 from swh.model.from_disk import Directory
 from swh.model.identifiers import (
     release_identifier, revision_identifier, snapshot_identifier,
     identifier_to_bytes, normalize_timestamp
 )
 
+from .client import PyPiClient
 from .model import PyPiProject
 
-try:
-    from swh.loader.pypi._version import __version__
-except ImportError:
-    __version__ = 'devel'
-
 
 TEMPORARY_DIR_PREFIX_PATTERN = 'swh.loader.pypi.'
 
 
-def convert_to_hex(d):
-    """Convert a flat dictionary with bytes in values to the same dictionary
-    with hex as values.
-
-    Args:
-        dict: flat dictionary with sha bytes in their values.
-
-    Returns:
-        Mirror dictionary with values as string hex.
-
-    """
-    if not d:
-        return d
-
-    checksums = {}
-    for key, h in d.items():
-        if isinstance(h, bytes):
-            checksums[key] = hashutil.hash_to_hex(h)
-        else:
-            checksums[key] = h
-
-    return checksums
-
-
-class PyPiClient:
-    """PyPi client in charge of discussing with the pypi server.
-
-    """
-    def __init__(self, temp_directory=None, cache=False, cache_dir=None):
-        self.version = __version__
-        if not temp_directory:
-            from tempfile import mkdtemp
-            self.temp_directory = mkdtemp(dir=temp_directory,
-                                          prefix='swh.loader.pypi.client')
-        else:
-            self.temp_directory = temp_directory
-
-        self.do_cache = cache
-        if self.do_cache:
-            self.cache_dir = cache_dir
-            os.makedirs(self.cache_dir, exist_ok=True)
-        self.session = requests.session()
-        self.params = {
-            'headers': {
-                'User-Agent': 'Software Heritage PyPi Loader (%s)' % (
-                    __version__
-                )
-            }
-        }
-
-    def _save_response(self, response):
-        """Log the response from a server request to a cache dir.
-
-        Args:
-            response: full server response
-            cache_dir: system path for cache dir
-        Returns:
-            nothing
-        """
-        import gzip
-        from json import dumps
-        datepath = arrow.utcnow().isoformat()
-        fname = os.path.join(self.cache_dir, datepath + '.gz')
-        with gzip.open(fname, 'w') as f:
-            f.write(bytes(
-                dumps(response.json()),
-                'UTF-8'
-            ))
-
-    def info(self, project_url):
-        """Given a metadata project url, retrieve the raw json response
-
-        """
-        response = self.session.get(project_url, **self.params)
-        if response.status_code != 200:
-            raise ValueError('Fail to load origin %s' % self.origin_url)
-
-        if self.do_cache:
-            self._save_response(response)
-
-        return response.json()
-
-    def retrieve_releases(self, project, releases):
-        """Given a dictionary of releases, retrieve them locally.
-
-        """
-        # order the release in time order
-        _release_versions = list(releases.keys())
-        _release_versions.sort()
-
-        for version in _release_versions:
-            release = releases[version]
-            _release = release.copy()
-            logging.debug('Release version: %s' % version)
-            path = os.path.join(self.temp_directory, project, version)
-            os.makedirs(path, exist_ok=True)
-            filepath = os.path.join(path, release['filename'])
-            logging.debug('Release local path: %s' % filepath)
-
-            r = self.session.get(release['url'])
-            if not r.ok:
-                raise ValueError('Fail to retrieve release %s' % version)
-
-            # checks
-            _len = len(r.content)
-            if _len != release['size']:
-                raise ValueError('Error when checking size: %s != %s' % (
-                    release['size'], _len))
-
-            # checking digest and writing
-            h = hashlib.sha256()
-            with open(filepath, 'wb') as f:
-                for chunk in r.iter_content():
-                    h.update(chunk)
-                    f.write(chunk)
-
-            actual_digest = h.hexdigest()
-            if actual_digest != release['sha256']:
-                raise ValueError(
-                    'Error when checking the hash checksum: %s != %s' % (
-                        release['sha256'], actual_digest))
-
-            uncompress_path = os.path.join(path, 'uncompress')
-            os.makedirs(uncompress_path, exist_ok=True)
-
-            nature = tarball.uncompress(filepath, uncompress_path)
-            _release['directory'] = uncompress_path
-
-            artifact = convert_to_hex(hashutil.hash_path(filepath))
-            artifact['archive_type'] = nature
-            for key, value in artifact.items():
-                _release[key] = value
-
-            yield version, _release
-
-
 class PyPiLoader(SWHStatelessLoader):
     CONFIG_BASE_FILENAME = 'loader/pypi'
     ADDITIONAL_CONFIG = {
@@ -227,6 +83,8 @@ class PyPiLoader(SWHStatelessLoader):
         self.project_name = project_name
         self.origin_url = origin_url
         self.origin_metadata_url = origin_metadata_url
+        self.project = PyPiProject(self.pypi_client, self.project_name,
+                                   self.origin_metadata_url)
 
     def get_contents(self):
         return self._contents
@@ -252,12 +110,7 @@ class PyPiLoader(SWHStatelessLoader):
           revisions, releases, snapshot)
 
         """
-        project_info = self.pypi_client.info(self.origin_metadata_url)
-        project = PyPiProject(project_info)
-        releases = self.pypi_client.retrieve_releases(
-            self.project_name, project.releases())
-        info = project.info()
-        author = project.author()
+        pypi_releases = self.project.releases()
 
         _contents = []
         _directories = []
@@ -269,7 +122,11 @@ class PyPiLoader(SWHStatelessLoader):
 
         _last_rev = None
 
-        for version, release in releases:
+        for version, _release in pypi_releases:
+            info = self.project.info(version)
+            author = self.project.author(version)
+            logging.debug('author: %s' % author)
+            release = _release['release']
             _dir_path = release.pop('directory')
             _dir_path = _dir_path.encode('utf-8')
             directory = Directory.from_disk(path=_dir_path, data=True)
@@ -325,6 +182,11 @@ class PyPiLoader(SWHStatelessLoader):
         _snapshot['id'] = identifier_to_bytes(
             snapshot_identifier(_snapshot))
 
+        logging.debug('contents: %s' % len(_contents))
+        logging.debug('directories: %s' % len(_directories))
+        logging.debug('revisions: %s' % len(_revisions))
+        logging.debug('releases: %s' % len(_releases))
+
         self._contents = _contents
         self._directories = _directories
         self._revisions = _revisions
diff --git a/swh/loader/pypi/model.py b/swh/loader/pypi/model.py
index 89213cdbb061b4fd0fed56ff333f391bf20161ee..662ca5fdb4cfb36b219d4e8b072a11a5bf3344cf 100644
--- a/swh/loader/pypi/model.py
+++ b/swh/loader/pypi/model.py
@@ -4,49 +4,105 @@
 # See top-level LICENSE file for more information
 
 
-import logging
+def info(data):
+    """Given a dict of data, returns a project subset.
+
+    """
+    info = data['info']
+    return {
+        'home_page': info['home_page'],
+        'description': info['description'],
+        'summary': info['summary'],
+        'license': info['license'],
+        'package_url': info['package_url'],
+        'project_url': info['project_url'],
+        'upstream': info['project_urls']['Homepage'],
+    }
+
+
+def author(data):
+    """Given a dict of data, returns an author subset.
+
+    """
+    name = data['info']['author']
+    email = data['info']['author_email']
+    if email:
+        fullname = '%s <%s>' % (name, email)
+    else:
+        fullname = name
+
+    return {
+        'fullname': fullname.encode('utf-8'),
+        'name': name.encode('utf-8'),
+        'email': email.encode('utf-8'),
+    }
 
 
 class PyPiProject:
     """PyPi project representation
 
+    This permits to extract information for the:
+    - project, either the latest information (from the last revision)
+    - project information for a given release
+    - same for author information
+
     """
-    def __init__(self, data):
-        self.data = data
-
-    def info(self):
-        return {
-            'home_page': self.data['info']['home_page'],
-            'description': self.data['info']['description'],
-            'summary': self.data['info']['summary'],
-            'license': self.data['info']['license'],
-            'package_url': self.data['info']['package_url'],
-            'project_url': self.data['info']['project_url'],
-            'upstream': self.data['info']['project_urls']['Homepage'],
-        }
+    def __init__(self, client, project, project_metadata_url, data=None):
+        self.client = client
+        self.project = project
+        self.project_metadata_url = project_metadata_url
+        if data:
+            self.data = data
+        else:
+            self.data = client.info(project_metadata_url)
 
-    def author(self):
-        name = self.data['info']['author'].encode('utf-8')
-        email = self.data['info']['author_email'].encode('utf-8')
-        return {
-            'fullname': name,
-            'name': name,
-            'email': email,
+        self.last_version = self.data['info']['version']
+        self.cache = {
+            self.last_version: self.data
         }
 
+    def _data(self, release_name=None):
+        if release_name:
+            data = self.cache.get(release_name)
+            if not data:
+                data = self.client.release(self.project, release_name)
+                self.cache[release_name] = data
+        else:
+            data = self.data
+        return data
+
+    def info(self, release_name=None):
+        return info(self._data(release_name))
+
+    def author(self, release_name=None):
+        return author(self._data(release_name))
+
+    def fetch_release(self, release_name=None):
+        pass
+
     def releases(self):
-        releases = {}
-        for version, release in self.data['releases'].items():
-            logging.debug('version: %s, release: %s' % (version, release))
+        # sort releases in ascending order
+        releases_dict = self.data['releases']
+        _releases = list(releases_dict.keys())
+        _releases.sort()
+        # The compute information per release
+        for version in _releases:
+            release = releases_dict[version]
+            if version == self.last_version:  # avoid an extra query
+                release_info = self.info()
+            else:
+                release_info = self.info(release_name=version)
+
             # FIXME: there can be multiple 'package_type' here:
             # sdist, bdist_egg, bdist_wheel
             if isinstance(release, list):
                 if len(release) > 1:
-                    raise ValueError(  # unexpected so fail so that we
-                                       # can fix later
-                        'Unexpected list of more than 1 element, failing!')
-                release = release[0]
-            releases[version] = {
+                    raise ValueError(
+                        'Unsupported other formats for now, failing!')
+
+            release = release[0]
+            # flatten the metadata to ease reading
+            _flattenned_release = {
                 'name': version,
                 'message': release['comment_text'],
                 'sha256': release['digests']['sha256'],
@@ -55,4 +111,10 @@ class PyPiProject:
                 'url': release['url'],
                 'date': release['upload_time'],
             }
-        return releases
+            # fetch and write locally archives
+            _release = self.client.fetch_release(version, _flattenned_release)
+
+            yield version, {
+                'info': release_info,
+                'release': _release,
+            }
diff --git a/swh/loader/pypi/tests/test_model.py b/swh/loader/pypi/tests/test_model.py
index 4f143a0853d4a7ae1307515a744922120dc11ae2..5ce57593ab1489290ae54f92e16c200a2d5c95e7 100644
--- a/swh/loader/pypi/tests/test_model.py
+++ b/swh/loader/pypi/tests/test_model.py
@@ -11,12 +11,52 @@ from nose.tools import istest
 from swh.loader.pypi.model import PyPiProject
 
 
-class ModelTest(TestCase):
+class MockPyPiClient:
+    def release(self, project, release):
+        """Simulate the computation of the release object.
+
+        The production code will trigger a query to the pypi api.
+
+        """
+        if release == '0.1':
+            return {
+                'info': {
+                    'home_page': 'something',
+                    'description': 'awesome python package',
+                    'summary': 'awesome python package',
+                    'license': '',
+                    'package_url': '',
+                    'project_url': '',
+                    'project_urls': {
+                        'Homepage': ''
+                    },
+                },
+                'releases': {
+
+                },
+            }
+
+    def fetch_release(self, project, release):
+        """Avoid fetching and writing to disk, simply returns the release
+           object
+
+           The production code will trigger the raw archive fetch and
+           writes to temporary disk, we avoid this here.
+
+        """
+        return release
+
 
+class ModelTest(TestCase):
     def setUp(self):
         with open('./swh/loader/pypi/tests/test_model_data.json') as f:
             self.data = json.load(f)
-        self.project = PyPiProject(self.data)
+
+        self.project = PyPiProject(
+            client=MockPyPiClient(),
+            project='7xydothis',
+            project_metadata_url='https://pypi.org/pypi/7xydothis/json',
+            data=self.data)
 
     @istest
     def info(self):
@@ -41,7 +81,7 @@ class ModelTest(TestCase):
         name = self.data['info']['author'].encode('utf-8')
         email = self.data['info']['author_email'].encode('utf-8')
         expected_author = {
-            'fullname': name,
+            'fullname': b'%s <%s>' % (name, email),
             'name': name,
             'email': email,
         }
@@ -55,28 +95,43 @@ class ModelTest(TestCase):
         release0 = self.data['releases']['0.1'][0]
         release1 = self.data['releases']['0.1.1'][0]
         self.maxDiff = None
-        expected_releases = {
-            '0.1': {
-                'name': '0.1',
-                'message': release0['comment_text'],
-                'sha256': release0['digests']['sha256'],
-                'size': release0['size'],
-                'filename': release0['filename'],
-                'url': release0['url'],
-                'date': release0['upload_time'],
-            },
-            '0.1.1': {
-                'name': '0.1.1',
-                'message': release1['comment_text'],
-                'sha256': release1['digests']['sha256'],
-                'size': release1['size'],
-                'filename': release1['filename'],
-                'url': release1['url'],
-                'date': release1['upload_time'],
-            }
-        }
-
-        self.assertEqual(expected_releases, actual_releases)
+        release_011_info = self.project.info()
+        expected_releases = [
+            ('0.1', {
+                'info': {
+                    'home_page': 'something',
+                    'description': 'awesome python package',
+                    'summary': 'awesome python package',
+                    'license': '',
+                    'package_url': '',
+                    'project_url': '',
+                    'upstream': '',
+                },
+                'release': {
+                    'name': '0.1',
+                    'message': release0['comment_text'],
+                    'sha256': release0['digests']['sha256'],
+                    'size': release0['size'],
+                    'filename': release0['filename'],
+                    'url': release0['url'],
+                    'date': release0['upload_time'],
+                }
+            }),
+            ('0.1.1', {
+                'info': release_011_info,
+                'release': {
+                    'name': '0.1.1',
+                    'message': release1['comment_text'],
+                    'sha256': release1['digests']['sha256'],
+                    'size': release1['size'],
+                    'filename': release1['filename'],
+                    'url': release1['url'],
+                    'date': release1['upload_time'],
+                }
+            })
+        ]
+
+        self.assertEqual(expected_releases, list(actual_releases))
 
     @istest
     def releases_unexpected_release_format(self):
@@ -84,5 +139,5 @@ class ModelTest(TestCase):
         data['releases']['0.1'].append({'anything': 'really to break'})
 
         with self.assertRaisesRegex(ValueError,
-                                    'Unexpected list of more than 1'):
-            self.project.releases()
+                                    'Unsupported other formats for now'):
+            list(self.project.releases())