diff --git a/debian/control b/debian/control
index e841fc9d3bcb99dcad431ff5341bbf17b20e720f..8d85b304914c0cb8a6492252f6eecc1f97aa3a9c 100644
--- a/debian/control
+++ b/debian/control
@@ -5,7 +5,9 @@ Priority: optional
 Build-Depends: debhelper (>= 9),
                dh-python (>= 2),
                python3-all,
+               python3-arrow,
                python3-nose,
+               python3-requests,
                python3-setuptools,
                python3-swh.core,
                python3-swh.storage,
diff --git a/requirements.txt b/requirements.txt
index ae22f85f3a9ea9c2a5921256c2a89e795807522c..4691d161ea27d0a5bcb67791338dc1181187daed 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,4 @@
 setuptools
 vcversioner
+requests
+arrow
diff --git a/setup.py b/setup.py
index 9232956f14394aa93bd2fe88cc3dd7e093b0b624..c1edd316dac45d0150293cf8c96b650e88b05f97 100755
--- a/setup.py
+++ b/setup.py
@@ -35,6 +35,6 @@ setup(
     install_requires=parse_requirements() + parse_requirements('swh'),
     test_requires=parse_requirements('test'),
     setup_requires=['vcversioner'],
-    vcversioner={},
+    vcversioner={'version_module_paths': ['swh/loader/pypi/_version.py']},
     include_package_data=True,
 )
diff --git a/swh/loader/__init__.py b/swh/loader/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..69e3be50dac40cddced86e9df2f9c2df5a75f004 100644
--- a/swh/loader/__init__.py
+++ b/swh/loader/__init__.py
@@ -0,0 +1 @@
+__path__ = __import__('pkgutil').extend_path(__path__, __name__)
diff --git a/swh/loader/pypi/loader.py b/swh/loader/pypi/loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..423c7ab33ce6be228451362caf0a99320d0464e0
--- /dev/null
+++ b/swh/loader/pypi/loader.py
@@ -0,0 +1,315 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import hashlib
+import logging
+import os
+import requests
+import shutil
+
+from swh.core import tarball
+from swh.loader.core.utils import clean_dangling_folders
+from swh.loader.core.loader import SWHStatelessLoader
+from swh.model import hashutil
+from swh.model.from_disk import Directory
+from swh.model.identifiers import (release_identifier, revision_identifier,
+                                   snapshot_identifier, identifier_to_bytes)
+
+from .model import PyPiProject
+
+try:
+    from swh.loader.pypi._version import __version__
+except ImportError:
+    __version__ = 'devel'
+
+
+TEMPORARY_DIR_PREFIX_PATTERN = 'swh.loader.pypi.'
+
+
+def convert_to_hex(d):
+    """Convert a flat dictionary with bytes in values to the same dictionary
+    with hex as values.
+
+    Args:
+        dict: flat dictionary with sha bytes in their values.
+
+    Returns:
+        Mirror dictionary with values as string hex.
+
+    """
+    if not d:
+        return d
+
+    checksums = {}
+    for key, h in d.items():
+        if isinstance(h, bytes):
+            checksums[key] = hashutil.hash_to_hex(h)
+        else:
+            checksums[key] = h
+
+    return checksums
+
+
+class PyPiClient:
+    """PyPi client in charge of discussing with the pypi server.
+
+    """
+    def __init__(self, temp_directory=None, cache=False, cache_dir=None):
+        self.version = __version__
+        if not temp_directory:
+            from tempfile import mkdtemp
+            self.temp_directory = mkdtemp(dir=temp_directory,
+                                          prefix='swh.loader.pypi.client')
+        else:
+            self.temp_directory = temp_directory
+
+        self.do_cache = cache
+        if self.do_cache:
+            self.cache_dir = cache_dir
+            os.makedirs(self.cache_dir, exist_ok=True)
+        self.session = requests.session()
+        self.params = {
+            'headers': {
+                'User-Agent': 'Software Heritage PyPi Loader (%s)' % (
+                    __version__
+                )
+            }
+        }
+
+    def _save_response(self, response):
+        """Log the response from a server request to a cache dir.
+
+        Args:
+            response: full server response
+            cache_dir: system path for cache dir
+        Returns:
+            nothing
+        """
+        import gzip
+        from json import dumps
+        from arrow import utcnow
+        datepath = utcnow().isoformat()
+        fname = os.path.join(self.cache_dir, datepath + '.gz')
+        with gzip.open(fname, 'w') as f:
+            f.write(bytes(
+                dumps(response.json()),
+                'UTF-8'
+            ))
+
+    def info(self, project_url):
+        """Given a metadata project url, retrieve the raw json response
+
+        """
+        response = self.session.get(project_url, **self.params)
+        if response.status_code != 200:
+            raise ValueError('Fail to load origin %s' % self.origin_url)
+
+        if self.do_cache:
+            self._save_response(response)
+
+        return response.json()
+
+    def retrieve_releases(self, project, releases):
+        """Given a dictionary of releases, retrieve them locally.
+
+        """
+        _releases = releases.copy()
+        for version, release in releases.items():
+            logging.debug('version: %s' % version)
+            path = os.path.join(self.temp_directory, project, version)
+            os.makedirs(path, exist_ok=True)
+            filepath = os.path.join(path, release['filename'])
+            logging.debug('filepath to write: %s' % filepath)
+
+            r = self.session.get(release['url'])
+            if not r.ok:
+                raise ValueError('Fail to retrieve release %s' % version)
+
+            # checks
+            _len = len(r.content)
+            if _len != release['size']:
+                raise ValueError('Error when checking size: %s != %s' % (
+                    release['size'], _len))
+
+            # checking digest and writing
+            h = hashlib.sha256()
+            with open(filepath, 'wb') as f:
+                for chunk in r.iter_content():
+                    h.update(chunk)
+                    f.write(chunk)
+
+            actual_digest = h.hexdigest()
+            if actual_digest != release['sha256']:
+                raise ValueError(
+                    'Error when checking the hash checksum: %s != %s' % (
+                        release['sha256'], actual_digest))
+
+            uncompress_path = os.path.join(path, 'uncompress')
+            os.makedirs(uncompress_path, exist_ok=True)
+
+            nature = tarball.uncompress(filepath, uncompress_path)
+            _releases[version]['directory'] = uncompress_path
+
+            artifact = convert_to_hex(hashutil.hash_path(filepath))
+            artifact['archive_type'] = nature
+            for key, value in artifact.items():
+                _releases[version][key] = value
+
+        return _releases
+
+
+class PyPiLoader(SWHStatelessLoader):
+    CONFIG_BASE_FILENAME = 'loader/pypi'
+    ADDITIONAL_CONFIG = {
+        'temp_directory': ('str', '/tmp/swh.loader.pypi/'),
+        'cache': ('bool', False),
+        'cache_dir': ('str', ''),
+        'debug': ('bool', False),  # NOT FOR PRODUCTION
+    }
+
+    def __init__(self):
+        super().__init__(logging_class='swh.loader.pypi.PyPiLoader')
+        self.origin_id = None
+        self.temp_directory = self.config['temp_directory']
+        self.pypi_client = PyPiClient(
+            temp_directory=self.temp_directory,
+            cache=self.config['cache'],
+            cache_dir=self.config['cache_dir'])
+        self.debug = self.config['debug']
+
+    def pre_cleanup(self):
+        """(override) To prevent disk explosion...
+
+        """
+        clean_dangling_folders(self.temp_directory,
+                               pattern_check=TEMPORARY_DIR_PREFIX_PATTERN,
+                               log=self.log)
+
+    def cleanup(self):
+        """(override) Clean up temporary disk use
+
+        """
+        if self.debug:
+            self.log.warn('** DEBUG MODE ** Will not clean up temp dir  %s' % (
+                self.temp_directory
+            ))
+            return
+        if os.path.exists(self.temp_directory):
+            self.log.debug('Clean up %s' % self.temp_directory)
+            shutil.rmtree(self.temp_directory)
+
+    def prepare_origin_visit(self, project_name, origin_url,
+                             origin_metadata_url=None):
+        """(override) Prepare the origin visit information
+
+        """
+        self.origin = {
+            'url': origin_url,
+            'type': 'pypi',
+        }
+        self.visit_date = None
+
+    def prepare(self, project_name, origin_url,
+                origin_metadata_url=None):
+        """(override) Keep reference to the origin url (project) and the
+           project metadata url
+
+        """
+        self.project_name = project_name
+        self.origin_url = origin_url
+        self.origin_metadata_url = origin_metadata_url
+
+    def get_contents(self):
+        return self.contents
+
+    def get_directories(self):
+        return self.directories()
+
+    def get_revisions(self):
+        return self.revisions
+
+    def get_releases(self):
+        return self.releases
+
+    def get_snapshot(self):
+        return self.snapshot
+
+    def fetch_data(self):
+        """(override) Retrieve the pypi origin's information
+
+        """
+        project_info = self.pypi_client.info(self.origin_metadata_url)
+        project = PyPiProject(project_info)
+        releases = self.pypi_client.retrieve_releases(
+            self.project_name, project.releases())
+        info = project.info()
+        author = project.author()
+
+        _contents = []
+        _directories = []
+        _revisions = []
+        _releases = []
+        _snapshot = {
+            'branches': {}
+        }
+
+        # for each
+        for version, release in releases.items():
+            _dir_path = release.pop('directory')
+            directory = Directory.from_disk(path=_dir_path.encode('utf-8'),
+                                            save_path=True)
+            _objects = directory.collect()
+
+            _contents.append(_objects['content'].values())
+            _directories.append(_objects['directory'].values())
+
+            _revision = {
+                'synthetic': True,
+                'metadata': {
+                    'original_artifact': [release],
+                    'project': info,
+                },
+                'author': author,
+                'date': release['date'],
+                'committer': author,
+                'committer_date': release['date'],
+                'name': release['name'],
+                'message': release['message'],
+                'directory': directory.hash,
+                'parents': [],
+                'type': 'tar',
+            }
+            _revision['id'] = identifier_to_bytes(
+                revision_identifier(_revision))
+            _revisions.append(_revision)
+
+            _release = {
+                'name': release['name'],
+                'author': author,
+                'date': release['date'],
+                'message': release['message'],
+                'target_type': 'revision',
+                'target': _revision['id'],
+            }
+            _release['id'] = identifier_to_bytes(
+                release_identifier(_release))
+            _releases.append(_release)
+
+            _snapshot['branches'][release['name']] = {
+                'target': _release['id'],
+                'target_type': 'release',
+            }
+
+            logging.debug('version: %s' % version)
+            logging.debug('release: %s' % release['directory'])
+
+        _snapshot['id'] = identifier_to_bytes(
+            snapshot_identifier(_snapshot))
+
+        self.contents = _contents
+        self.directories = _directories
+        self.revisions = _revisions
+        self.releases = _releases
+        self.snapshot = _snapshot
diff --git a/swh/loader/pypi/model.py b/swh/loader/pypi/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..365255c6aaf230b24ff61d4917bbab7585916072
--- /dev/null
+++ b/swh/loader/pypi/model.py
@@ -0,0 +1,50 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+class PyPiProject:
+    """PyPi project representation
+
+    """
+    def __init__(self, data):
+        self.data = data
+
+    def info(self):
+        return {
+            'home_page': self.data['info']['home_page'],
+            'description': self.data['info']['description'],
+            'summary': self.data['info']['summary'],
+            'license': self.data['info']['license'],
+            'package_url': self.data['info']['package_url'],
+            'project_url': self.data['info']['project_url'],
+            'upstream': self.data['info']['project_urls']['Homepage'],
+        }
+
+    def author(self):
+        return {
+            'fullname': self.data['info']['author'],
+            'name': self.data['info']['author'],
+            'email': self.data['info']['author_email']
+        }
+
+    def releases(self):
+        releases = {}
+        for version, release in self.data['releases'].items():
+            if isinstance(release, list):
+                if len(release) > 1:
+                    raise ValueError(  # unexpected so fail so that we
+                                       # can fix later
+                        'Unexpected list of more than 1 element, failing!')
+                release = release[0]
+            releases[version] = {
+                'name': version,
+                'message': release['comment_text'],
+                'sha256': release['digests']['sha256'],
+                'size': release['size'],
+                'filename': release['filename'],
+                'url': release['url'],
+                'date': release['upload_time'],
+            }
+        return releases
diff --git a/swh/loader/pypi/tasks.py b/swh/loader/pypi/tasks.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0792f199f411cc6a58b2ab04756b574e58d2212
--- /dev/null
+++ b/swh/loader/pypi/tasks.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.scheduler.task import Task
+
+from .loader import PyPiLoader
+
+
+class LoadPyPiTsk(Task):
+    task_queue = 'swh_loader_pypi'
+
+    def run_task(self, project_name, project_url, project_metadata_url=None):
+        loader = PyPiLoader()
+        loader.log = self.log
+        return loader.load(project_name,
+                           project_url,
+                           origin_metadata_url=project_metadata_url)
diff --git a/swh/loader/pypi/tests/test_model.py b/swh/loader/pypi/tests/test_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e2444eb2138fa42590031077d0c77c1a0b33414
--- /dev/null
+++ b/swh/loader/pypi/tests/test_model.py
@@ -0,0 +1,86 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import json
+
+from unittest import TestCase
+from nose.tools import istest
+
+from swh.loader.pypi.model import PyPiProject
+
+
+class ModelTest(TestCase):
+
+    def setUp(self):
+        with open('./swh/loader/pypi/tests/test_model_data.json') as f:
+            self.data = json.load(f)
+        self.project = PyPiProject(self.data)
+
+    @istest
+    def info(self):
+        actual_info = self.project.info()
+
+        expected_info = {
+            'home_page': self.data['info']['home_page'],
+            'description': self.data['info']['description'],
+            'summary': self.data['info']['summary'],
+            'license': self.data['info']['license'],
+            'package_url': self.data['info']['package_url'],
+            'project_url': self.data['info']['project_url'],
+            'upstream': self.data['info']['project_urls']['Homepage'],
+        }
+
+        self.assertEqual(expected_info, actual_info)
+
+    @istest
+    def author(self):
+        actual_author = self.project.author()
+
+        expected_author = {
+            'fullname': self.data['info']['author'],
+            'name': self.data['info']['author'],
+            'email': self.data['info']['author_email'],
+        }
+
+        self.assertEqual(expected_author, actual_author)
+
+    @istest
+    def releases(self):
+        actual_releases = self.project.releases()
+
+        release0 = self.data['releases']['0.1'][0]
+        release1 = self.data['releases']['0.1.1'][0]
+        self.maxDiff = None
+        expected_releases = {
+            '0.1': {
+                'name': '0.1',
+                'message': release0['comment_text'],
+                'sha256': release0['digests']['sha256'],
+                'size': release0['size'],
+                'filename': release0['filename'],
+                'url': release0['url'],
+                'date': release0['upload_time'],
+            },
+            '0.1.1': {
+                'name': '0.1.1',
+                'message': release1['comment_text'],
+                'sha256': release1['digests']['sha256'],
+                'size': release1['size'],
+                'filename': release1['filename'],
+                'url': release1['url'],
+                'date': release1['upload_time'],
+            }
+        }
+
+        self.assertEqual(expected_releases, actual_releases)
+
+    @istest
+    def releases_unexpected_release_format(self):
+        data = self.data.copy()
+        data['releases']['0.1'].append({'anything': 'really to break'})
+
+        with self.assertRaisesRegex(ValueError,
+                                    'Unexpected list of more than 1'):
+            self.project.releases()
diff --git a/swh/loader/pypi/tests/test_model_data.json b/swh/loader/pypi/tests/test_model_data.json
new file mode 100644
index 0000000000000000000000000000000000000000..17a02e57802db1c13c8768dca4cd17dc2a81b74f
--- /dev/null
+++ b/swh/loader/pypi/tests/test_model_data.json
@@ -0,0 +1,95 @@
+{
+  "info": {
+    "author": "bernardfrk",
+    "author_email": "bernard.frk@gmail.com",
+    "bugtrack_url": null,
+    "classifiers": [],
+    "description": "Utitilies to use the 7xydothis APIs",
+    "description_content_type": null,
+    "docs_url": null,
+    "download_url": "UNKNOWN",
+    "downloads": {
+      "last_day": -1,
+      "last_month": -1,
+      "last_week": -1
+    },
+    "home_page": "https://github.com/frkb/7xydothis",
+    "keywords": null,
+    "license": "UNKNOWN",
+    "maintainer": null,
+    "maintainer_email": null,
+    "name": "7xydothis",
+    "package_url": "https://pypi.org/project/7xydothis/",
+    "platform": "UNKNOWN",
+    "project_url": "https://pypi.org/project/7xydothis/",
+    "project_urls": {
+      "Download": "UNKNOWN",
+      "Homepage": "https://github.com/frkb/7xydothis"
+    },
+    "release_url": "https://pypi.org/project/7xydothis/0.1.1/",
+    "requires_dist": null,
+    "requires_python": null,
+    "summary": "Utitilies to use the 7xydothis APIs",
+    "version": "0.1.1"
+  },
+  "last_serial": 2668125,
+  "releases": {
+    "0.1": [
+      {
+        "comment_text": "",
+        "digests": {
+          "md5": "578e4bde98db732109d0698aba168a06",
+          "sha256": "7e6f59be532d43ac0ad32da6a068417f0973285a38a08f3f5056f79770f2f973"
+        },
+        "downloads": -1,
+        "filename": "7xydothis-0.1.tar.gz",
+        "has_sig": false,
+        "md5_digest": "578e4bde98db732109d0698aba168a06",
+        "packagetype": "sdist",
+        "python_version": "source",
+        "requires_python": null,
+        "size": 868,
+        "upload_time": "2017-02-25T21:31:02",
+        "url": "https://files.pythonhosted.org/packages/68/55/6a00e46a1a10e7a0731e50cbcc9f6243c5112eeda8326d781a03a1254105/7xydothis-0.1.tar.gz"
+      }
+    ],
+    "0.1.1": [
+      {
+        "comment_text": "",
+        "digests": {
+          "md5": "75fe55b933330adbde027b6edc74863d",
+          "sha256": "76d243b70a10d51ea87312a97a7d7b1a525984fd56d1c5f41650a1fa0fde1bc1"
+        },
+        "downloads": -1,
+        "filename": "7xydothis-0.1.1.tar.gz",
+        "has_sig": false,
+        "md5_digest": "75fe55b933330adbde027b6edc74863d",
+        "packagetype": "sdist",
+        "python_version": "source",
+        "requires_python": null,
+        "size": 875,
+        "upload_time": "2017-02-25T21:41:37",
+        "url": "https://files.pythonhosted.org/packages/96/64/6fd8e189aa97820b306f06dbce02d618bf155379575c553db3d2c2eda045/7xydothis-0.1.1.tar.gz"
+      }
+    ]
+  },
+  "urls": [
+    {
+      "comment_text": "",
+      "digests": {
+        "md5": "75fe55b933330adbde027b6edc74863d",
+        "sha256": "76d243b70a10d51ea87312a97a7d7b1a525984fd56d1c5f41650a1fa0fde1bc1"
+      },
+      "downloads": -1,
+      "filename": "7xydothis-0.1.1.tar.gz",
+      "has_sig": false,
+      "md5_digest": "75fe55b933330adbde027b6edc74863d",
+      "packagetype": "sdist",
+      "python_version": "source",
+      "requires_python": null,
+      "size": 875,
+      "upload_time": "2017-02-25T21:41:37",
+      "url": "https://files.pythonhosted.org/packages/96/64/6fd8e189aa97820b306f06dbce02d618bf155379575c553db3d2c2eda045/7xydothis-0.1.1.tar.gz"
+    }
+  ]
+}