diff --git a/PKG-INFO b/PKG-INFO
index 5686739b8832f9f424031f4794cd236ad8a0b108..8cf842c30faf2540ab4ef86436e86fcc83e45d11 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.0
 Name: swh.model
-Version: 0.0.17
+Version: 0.0.18
 Summary: Software Heritage data model
 Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
 Author: Software Heritage developers
diff --git a/docs/.gitignore b/docs/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..58a761ead8c3be489f0e4738fac8d2173656ea7f
--- /dev/null
+++ b/docs/.gitignore
@@ -0,0 +1,3 @@
+_build/
+apidoc/
+*-stamp
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..c30c50ab01ec91da18f0718b7cfd052f046c2e44
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1 @@
+include ../../swh-docs/Makefile.sphinx
diff --git a/docs/_static/.placeholder b/docs/_static/.placeholder
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/docs/_templates/.placeholder b/docs/_templates/.placeholder
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..190deb7e5e29a032ce73bb31f168fe12df300d0d
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1 @@
+from swh.docs.sphinx.conf import *  # NoQA
diff --git a/docs/data-model.rst b/docs/data-model.rst
new file mode 100644
index 0000000000000000000000000000000000000000..f365f9f258de61421735fefce9b21747a0209fd4
--- /dev/null
+++ b/docs/data-model.rst
@@ -0,0 +1,13 @@
+.. _data-model:
+
+Software Heritage data model
+============================
+
+TODO
+
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
+consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
+cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
+proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000000000000000000000000000000000000..22eccfdcec5757904f4fb768d85c504a96acaa69
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,20 @@
+Software Heritage - Development Documentation
+=============================================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+
+Overview
+--------
+
+* :ref:`data-model`
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/setup.py b/setup.py
index 793b8e7d10393f030b9d4cc07ded3f78ed176976..8d2e843cc2269322fcae0c9248b0399c180de910 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-import sys
+import hashlib
 
 from setuptools import setup
 
@@ -16,8 +16,11 @@ def parse_requirements():
 
 
 extra_requirements = []
-if sys.version_info < (3, 5):
-    extra_requirements = ['pyblake2']
+
+
+pyblake2_hashes = {'blake2s256', 'blake2b512'}
+if pyblake2_hashes - set(hashlib.algorithms_available):
+    extra_requirements.append('pyblake2')
 
 setup(
     name='swh.model',
diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO
index 5686739b8832f9f424031f4794cd236ad8a0b108..8cf842c30faf2540ab4ef86436e86fcc83e45d11 100644
--- a/swh.model.egg-info/PKG-INFO
+++ b/swh.model.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.0
 Name: swh.model
-Version: 0.0.17
+Version: 0.0.18
 Summary: Software Heritage data model
 Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
 Author: Software Heritage developers
diff --git a/swh.model.egg-info/SOURCES.txt b/swh.model.egg-info/SOURCES.txt
index 79270515293f77238b836ae264f16b4ddd404f12..f88b36d8d8986741d8a5ba3395af1e497fdaa1c1 100644
--- a/swh.model.egg-info/SOURCES.txt
+++ b/swh.model.egg-info/SOURCES.txt
@@ -17,6 +17,13 @@ debian/control
 debian/copyright
 debian/rules
 debian/source/format
+docs/.gitignore
+docs/Makefile
+docs/conf.py
+docs/data-model.rst
+docs/index.rst
+docs/_static/.placeholder
+docs/_templates/.placeholder
 swh.model.egg-info/PKG-INFO
 swh.model.egg-info/SOURCES.txt
 swh.model.egg-info/dependency_links.txt
@@ -24,19 +31,21 @@ swh.model.egg-info/requires.txt
 swh.model.egg-info/top_level.txt
 swh/model/__init__.py
 swh/model/exceptions.py
-swh/model/git.py
+swh/model/from_disk.py
 swh/model/hashutil.py
 swh/model/identifiers.py
+swh/model/merkle.py
 swh/model/validators.py
 swh/model/fields/__init__.py
 swh/model/fields/compound.py
 swh/model/fields/hashes.py
 swh/model/fields/simple.py
 swh/model/tests/__init__.py
-swh/model/tests/test_git.py
-swh/model/tests/test_git_slow.py
+swh/model/tests/generate_testdata_from_disk.py
+swh/model/tests/test_from_disk.py
 swh/model/tests/test_hashutil.py
 swh/model/tests/test_identifiers.py
+swh/model/tests/test_merkle.py
 swh/model/tests/test_validators.py
 swh/model/tests/fields/__init__.py
 swh/model/tests/fields/test_compound.py
diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9f37290cbf91aaf177b366496d84cda4ead46f0
--- /dev/null
+++ b/swh/model/from_disk.py
@@ -0,0 +1,346 @@
+# Copyright (C) 2017 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import enum
+import os
+import stat
+
+from . import hashutil
+from .merkle import MerkleLeaf, MerkleNode
+from .identifiers import (
+    directory_identifier,
+    identifier_to_bytes as id_to_bytes,
+    identifier_to_str as id_to_str,
+)
+
+
+class DentryPerms(enum.IntEnum):
+    """Admissible permissions for directory entries."""
+    content = 0o100644
+    """Content"""
+    executable_content = 0o100755
+    """Executable content (e.g. executable script)"""
+    symlink = 0o120000
+    """Symbolic link"""
+    directory = 0o040000
+    """Directory"""
+    revision = 0o160000
+    """Revision (e.g. submodule)"""
+
+
+def mode_to_perms(mode):
+    """Convert a file mode to a permission compatible with Software Heritage
+    directory entries
+
+    Args:
+      mode (int): a file mode as returned by :func:`os.stat` in
+                  :attr:`os.stat_result.st_mode`
+
+    Returns:
+      DentryPerms: one of the following values:
+        :const:`DentryPerms.content`: plain file
+        :const:`DentryPerms.executable_content`: executable file
+        :const:`DentryPerms.symlink`: symbolic link
+        :const:`DentryPerms.directory`: directory
+
+    """
+    if stat.S_ISLNK(mode):
+        return DentryPerms.symlink
+    if stat.S_ISDIR(mode):
+        return DentryPerms.directory
+    else:
+        # file is executable in any way
+        if mode & (0o111):
+            return DentryPerms.executable_content
+        else:
+            return DentryPerms.content
+
+
+class Content(MerkleLeaf):
+    """Representation of a Software Heritage content as a node in a Merkle tree.
+
+    The current Merkle hash for the Content nodes is the `sha1_git`, which
+    makes it consistent with what :class:`Directory` uses for its own hash
+    computation.
+
+    """
+    __slots__ = []
+    type = 'content'
+
+    @classmethod
+    def from_bytes(cls, *, mode, data):
+        """Convert data (raw :class:`bytes`) to a Software Heritage content entry
+
+        Args:
+          mode (int): a file mode (passed to :func:`mode_to_perms`)
+          data (bytes): raw contents of the file
+        """
+        ret = hashutil.hash_data(data)
+        ret['length'] = len(data)
+        ret['perms'] = mode_to_perms(mode)
+        ret['data'] = data
+
+        return cls(ret)
+
+    @classmethod
+    def from_symlink(cls, *, path, mode):
+        """Convert a symbolic link to a Software Heritage content entry"""
+        return cls.from_bytes(mode=mode, data=os.readlink(path))
+
+    @classmethod
+    def from_file(cls, *, path, data=False, save_path=False):
+        """Compute the Software Heritage content entry corresponding to an on-disk
+        file.
+
+        The returned dictionary contains keys useful for both:
+        - loading the content in the archive (hashes, `length`)
+        - using the content as a directory entry in a directory
+
+        Args:
+          path (bytes): path to the file for which we're computing the
+            content entry
+          data (bool): add the file data to the entry
+          save_path (bool): add the file path to the entry
+        """
+        file_stat = os.lstat(path)
+        mode = file_stat.st_mode
+
+        if stat.S_ISLNK(mode):
+            # Symbolic link: return a file whose contents are the link target
+            return cls.from_symlink(path=path, mode=mode)
+        elif not stat.S_ISREG(mode):
+            # not a regular file: return the empty file instead
+            return cls.from_bytes(mode=mode, data=b'')
+
+        length = file_stat.st_size
+
+        if not data:
+            ret = hashutil.hash_path(path)
+        else:
+            chunks = []
+
+            def append_chunk(x, chunks=chunks):
+                chunks.append(x)
+
+            with open(path, 'rb') as fobj:
+                ret = hashutil.hash_file(fobj, length=length,
+                                         chunk_cb=append_chunk)
+
+            ret['data'] = b''.join(chunks)
+
+        if save_path:
+            ret['path'] = path
+        ret['perms'] = mode_to_perms(mode)
+        ret['length'] = length
+
+        obj = cls(ret)
+        return obj
+
+    def __repr__(self):
+        return 'Content(id=%s)' % id_to_str(self.hash)
+
+    def compute_hash(self):
+        return self.data['sha1_git']
+
+
+def accept_all_directories(dirname, entries):
+    """Default filter for :func:`Directory.from_disk` accepting all
+    directories
+
+    Args:
+      dirname (bytes): directory name
+      entries (list): directory entries
+    """
+    return True
+
+
+def ignore_empty_directories(dirname, entries):
+    """Filter for :func:`directory_to_objects` ignoring empty directories
+
+    Args:
+      dirname (bytes): directory name
+      entries (list): directory entries
+    Returns:
+      True if the directory is not empty, false if the directory is empty
+    """
+    return bool(entries)
+
+
+def ignore_named_directories(names, *, case_sensitive=True):
+    """Filter for :func:`directory_to_objects` to ignore directories named one
+    of names.
+
+    Args:
+      names (list of bytes): names to ignore
+      case_sensitive (bool): whether to do the filtering in a case sensitive
+        way
+    Returns:
+      a directory filter for :func:`directory_to_objects`
+    """
+    if not case_sensitive:
+        names = [name.lower() for name in names]
+
+    def named_filter(dirname, entries,
+                     names=names, case_sensitive=case_sensitive):
+        if case_sensitive:
+            return dirname not in names
+        else:
+            return dirname.lower() not in names
+
+    return named_filter
+
+
+class Directory(MerkleNode):
+    """Representation of a Software Heritage directory as a node in a Merkle Tree.
+
+    This class can be used to generate, from an on-disk directory, all the
+    objects that need to be sent to the Software Heritage archive.
+
+    The :func:`from_disk` constructor allows you to generate the data structure
+    from a directory on disk. The resulting :class:`Directory` can then be
+    manipulated as a dictionary, using the path as key.
+
+    The :func:`collect` method is used to retrieve all the objects that need to
+    be added to the Software Heritage archive since the last collection, by
+    class (contents and directories).
+
+    When using the dict-like methods to update the contents of the directory,
+    the affected levels of hierarchy are reset and can be collected again using
+    the same method. This enables the efficient collection of updated nodes,
+    for instance when the client is applying diffs.
+    """
+    __slots__ = ['__entries']
+    type = 'directory'
+
+    @classmethod
+    def from_disk(cls, *, path, data=False, save_path=False,
+                  dir_filter=accept_all_directories):
+        """Compute the Software Heritage objects for a given directory tree
+
+        Args:
+          path (bytes): the directory to traverse
+          data (bool): whether to add the data to the content objects
+          save_path (bool): whether to add the path to the content objects
+          dir_filter (function): a filter to ignore some directories by
+            name or contents. Takes two arguments: dirname and entries, and
+            returns True if the directory should be added, False if the
+            directory should be ignored.
+        """
+
+        top_path = path
+        dirs = {}
+
+        for root, dentries, fentries in os.walk(top_path, topdown=False):
+            entries = {}
+            # Join fentries and dentries in the same processing, as symbolic
+            # links to directories appear in dentries...
+            for name in fentries + dentries:
+                path = os.path.join(root, name)
+                if not os.path.isdir(path) or os.path.islink(path):
+                    content = Content.from_file(path=path, data=data,
+                                                save_path=save_path)
+                    entries[name] = content
+                else:
+                    if dir_filter(name, dirs[path].entries):
+                        entries[name] = dirs[path]
+
+            dirs[root] = cls({'name': os.path.basename(root)})
+            dirs[root].update(entries)
+
+        return dirs[top_path]
+
+    def __init__(self, data=None):
+        super().__init__(data=data)
+        self.__entries = None
+
+    def invalidate_hash(self):
+        self.__entries = None
+        super().invalidate_hash()
+
+    @staticmethod
+    def child_to_directory_entry(name, child):
+        if isinstance(child, Directory):
+            return {
+                'type': 'dir',
+                'perms': DentryPerms.directory,
+                'target': child.hash,
+                'name': name,
+            }
+        elif isinstance(child, Content):
+            return {
+                'type': 'file',
+                'perms': child.data['perms'],
+                'target': child.hash,
+                'name': name,
+            }
+        else:
+            raise ValueError('unknown child')
+
+    def get_data(self, **kwargs):
+        return {
+            'id': self.hash,
+            'entries': self.entries,
+        }
+
+    @property
+    def entries(self):
+        if self.__entries is None:
+            self.__entries = [
+                self.child_to_directory_entry(name, child)
+                for name, child in self.items()
+            ]
+
+        return self.__entries
+
+    def compute_hash(self):
+        return id_to_bytes(directory_identifier({'entries': self.entries}))
+
+    def __getitem__(self, key):
+        if not isinstance(key, bytes):
+            raise ValueError('Can only get a bytes from Directory')
+
+        # Convenience shortcut
+        if key == b'':
+            return self
+
+        if b'/' not in key:
+            return super().__getitem__(key)
+        else:
+            key1, key2 = key.split(b'/', 1)
+            return self.__getitem__(key1)[key2]
+
+    def __setitem__(self, key, value):
+        if not isinstance(key, bytes):
+            raise ValueError('Can only set a bytes Directory entry')
+        if not isinstance(value, (Content, Directory)):
+            raise ValueError('Can only set a Directory entry to a Content or '
+                             'Directory')
+
+        if key == b'':
+            raise ValueError('Directory entry must have a name')
+        if b'\x00' in key:
+            raise ValueError('Directory entry name must not contain nul bytes')
+
+        if b'/' not in key:
+            return super().__setitem__(key, value)
+        else:
+            key1, key2 = key.rsplit(b'/', 1)
+            self[key1].__setitem__(key2, value)
+
+    def __delitem__(self, key):
+        if not isinstance(key, bytes):
+            raise ValueError('Can only delete a bytes Directory entry')
+
+        if b'/' not in key:
+            super().__delitem__(key)
+        else:
+            key1, key2 = key.rsplit(b'/', 1)
+            del self[key1][key2]
+
+    def __repr__(self):
+        return 'Directory(id=%s, entries=[%s])' % (
+            id_to_str(self.hash),
+            ', '.join(str(entry) for entry in self),
+        )
diff --git a/swh/model/git.py b/swh/model/git.py
deleted file mode 100644
index ad5962f543318eb0b3435ea2889133f9e8fad8ed..0000000000000000000000000000000000000000
--- a/swh/model/git.py
+++ /dev/null
@@ -1,587 +0,0 @@
-# Copyright (C) 2015-2017  The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-
-import os
-import stat
-
-from enum import Enum, IntEnum
-
-from swh.model import hashutil, identifiers
-
-
-ROOT_TREE_KEY = b''
-
-
-class GitType(Enum):
-    BLOB = b'blob'
-    TREE = b'tree'
-    EXEC = b'exec'
-    LINK = b'link'
-    COMM = b'commit'
-    RELE = b'release'
-    REFS = b'ref'
-
-
-class GitPerm(IntEnum):
-    BLOB = 0o100644
-    TREE = 0o040000
-    EXEC = 0o100755
-    LINK = 0o120000
-
-
-def _compute_directory_git_sha1(hashes):
-    """Compute a directory git sha1 from hashes.
-
-    Args:
-        hashes: list of tree entries with keys:
-            - sha1_git: the tree entry's sha1
-            - name: file or subdir's name
-            - perms: the tree entry's sha1 permissions
-
-        Returns:
-            the binary sha1 of the dictionary's identifier
-
-        Assumes:
-            Every path exists in hashes.
-
-    """
-    directory = {
-        'entries':
-        [
-            {
-                'name': entry['name'],
-                'perms': entry['perms'],
-                'target': entry['sha1_git'],
-                'type': 'dir' if entry['perms'] == GitPerm.TREE else 'file',
-            }
-            for entry in hashes
-        ]
-    }
-    return hashutil.hash_to_bytes(identifiers.directory_identifier(directory))
-
-
-def compute_directory_git_sha1(dirpath, hashes):
-    """Compute a directory git sha1 for a dirpath.
-
-    Args:
-        dirpath: the directory's absolute path
-        hashes: list of tree entries with keys:
-            - sha1_git: the tree entry's sha1
-            - name: file or subdir's name
-            - perms: the tree entry's sha1 permissions
-
-        Returns:
-            the binary sha1 of the dictionary's identifier
-
-        Assumes:
-            Every path exists in hashes.
-
-    """
-    return _compute_directory_git_sha1(hashes[dirpath])
-
-
-def compute_revision_sha1_git(revision):
-    """Compute a revision sha1 git from its dict representation.
-
-    Args:
-        revision: Additional dictionary information needed to compute a
-        synthetic
-        revision. Following keys are expected:
-            - author
-            - date
-            - committer
-            - committer_date
-            - message
-            - type
-            - directory: binary form of the tree hash
-
-    Returns:
-        revision sha1 in bytes
-
-    # FIXME: beware, bytes output from storage api
-
-    """
-    return hashutil.hash_to_bytes(identifiers.revision_identifier(revision))
-
-
-def compute_release_sha1_git(release):
-    """Compute a release sha1 git from its dict representation.
-
-    Args:
-        release: Additional dictionary information needed to compute a
-        synthetic release. Following keys are expected:
-            - name
-            - message
-            - date
-            - author
-            - revision: binary form of the sha1_git revision targeted by this
-
-    Returns:
-        release sha1 in bytes
-
-    """
-    return hashutil.hash_to_bytes(identifiers.release_identifier(release))
-
-
-def compute_link_metadata(linkpath):
-    """Given a linkpath, compute the git metadata.
-
-    Args:
-        linkpath: absolute pathname of the link
-
-    Returns:
-        Dictionary of values:
-            - data: link's content
-            - length: link's content length
-            - name: basename of the link
-            - perms: git permission for link
-            - type: git type for link
-            - path: absolute path to the link on filesystem
-
-    """
-    data = os.readlink(linkpath)
-    link_metadata = hashutil.hash_data(data)
-    link_metadata.update({
-        'data': data,
-        'length': len(data),
-        'name': os.path.basename(linkpath),
-        'perms': GitPerm.LINK,
-        'type': GitType.BLOB,
-        'path': linkpath
-    })
-
-    return link_metadata
-
-
-def compute_blob_metadata(filepath):
-    """Given a filepath resolving to a regular file, compute the metadata.
-    Other file types (fifo, character or block device, symlink) will
-    be considered empty regular file.  To deal properly with symlinks,
-    use swh.model.git.compute_link_metadata.
-
-    Args:
-        filepath: absolute pathname of the regular file.
-
-    Returns:
-        Dictionary of values:
-            - name: basename of the file
-            - length: data length
-            - perms: git permission for file
-            - type: git type for file
-            - path: absolute filepath on filesystem
-
-    """
-    mode = os.lstat(filepath).st_mode
-    if not stat.S_ISREG(mode):  # special (block or character device, fifo)
-        perms = GitPerm.BLOB
-        blob_metadata = hashutil.hash_data(b'')
-        blob_metadata['length'] = 0
-    else:
-        perms = GitPerm.EXEC if os.access(filepath, os.X_OK) else GitPerm.BLOB
-        blob_metadata = hashutil.hash_path(filepath)
-
-    blob_metadata.update({
-        'name': os.path.basename(filepath),
-        'perms': perms,
-        'type': GitType.BLOB,
-        'path': filepath
-    })
-
-    return blob_metadata
-
-
-def _compute_tree_metadata(dirname, hashes):
-    """Given a dirname, compute the git metadata.
-
-    Args:
-        dirname: absolute pathname of the directory.
-        hashes: list of tree dirname's entries with keys:
-            - sha1_git: the tree entry's sha1
-            - name: file or subdir's name
-            - perms: the tree entry's sha1 permissions
-
-    Returns:
-        Dictionary of values:
-            - sha1_git: tree's sha1 git
-            - name: basename of the directory
-            - perms: git permission for directory
-            - type: git type for directory
-            - path: absolute path to directory on filesystem
-
-    """
-    return {
-        'sha1_git': _compute_directory_git_sha1(hashes),
-        'name': os.path.basename(dirname),
-        'perms': GitPerm.TREE,
-        'type': GitType.TREE,
-        'path': dirname
-    }
-
-
-def compute_tree_metadata(dirname, ls_hashes):
-    """Given a dirname, compute the git metadata.
-
-    Args:
-        dirname: absolute pathname of the directory.
-        ls_hashes: dictionary of path, hashes
-
-    Returns:
-        Dictionary of values:
-            - sha1_git: tree's sha1 git
-            - name: basename of the directory
-            - perms: git permission for directory
-            - type: git type for directory
-            - path: absolute path to directory on filesystem
-
-    """
-    return _compute_tree_metadata(dirname, ls_hashes[dirname])
-
-
-def default_validation_dir(dirpath):
-    """Default validation function.
-       This is the equivalent of the identity function.
-
-    Args:
-        dirpath: Path to validate
-
-    Returns: True
-
-    """
-    return True
-
-
-def _walk(rootdir,
-          dir_ok_fn=default_validation_dir,
-          remove_empty_folder=False):
-    """Walk the filesystem and yields a 3 tuples (dirpath, dirnames as set
-    of absolute paths, filenames as set of abslute paths)
-
-       Ignore files which won't pass the dir_ok_fn validation.
-
-       If remove_empty_folder is True, remove and ignore any
-       encountered empty folder.
-
-    Args:
-        - rootdir: starting walk root directory path
-        - dir_ok_fn: validation function. if folder encountered are
-        not ok, they are ignored.  Default to default_validation_dir
-        which does nothing.
-         - remove_empty_folder: Flag to remove and ignore any
-          encountered empty folders.
-
-    Yields:
-        3 tuples dirpath, set of absolute children dirname paths, set
-        of absolute filename paths.
-
-    """
-    def basic_gen_dir(rootdir):
-        for dp, dns, fns in os.walk(rootdir, topdown=False):
-            yield (dp,
-                   set((os.path.join(dp, dn) for dn in dns)),
-                   set((os.path.join(dp, fn) for fn in fns)))
-
-    if dir_ok_fn == default_validation_dir:
-        if not remove_empty_folder:  # os.walk
-            yield from basic_gen_dir(rootdir)
-        else:                        # os.walk + empty dir cleanup
-            empty_folders = set()
-            for dp, dns, fns in basic_gen_dir(rootdir):
-                if not dns and not fns:
-                    empty_folders.add(dp)
-                    # need to remove it because folder of empty folder
-                    # is an empty folder!!!
-                    if os.path.islink(dp):
-                        os.remove(dp)
-                    else:
-                        os.rmdir(dp)
-                    parent = os.path.dirname(dp)
-                    # edge case about parent containing one empty
-                    # folder which become an empty one
-                    while not os.listdir(parent):
-                        empty_folders.add(parent)
-                        if os.path.islink(parent):
-                            os.remove(parent)
-                        else:
-                            os.rmdir(parent)
-                        parent = os.path.dirname(parent)
-                    continue
-                yield (dp, dns - empty_folders, fns)
-    else:
-        def filtfn(dirnames):
-            return set(filter(dir_ok_fn, dirnames))
-
-        gen_dir = ((dp, dns, fns) for dp, dns, fns
-                   in basic_gen_dir(rootdir) if dir_ok_fn(dp))
-
-        if not remove_empty_folder:  # os.walk + filtering
-            for dp, dns, fns in gen_dir:
-                yield (dp, filtfn(dns), fns)
-        else:                        # os.walk + filtering + empty dir cleanup
-            empty_folders = set()
-            for dp, dns, fns in gen_dir:
-                dps = filtfn(dns)
-
-                if not dps and not fns:
-                    empty_folders.add(dp)
-                    # need to remove it because folder of empty folder
-                    # is an empty folder!!!
-                    if os.path.islink(dp):
-                        os.remove(dp)
-                    else:
-                        os.rmdir(dp)
-                    parent = os.path.dirname(dp)
-                    # edge case about parent containing one empty
-                    # folder which become an empty one
-                    while not os.listdir(parent):
-                        empty_folders.add(parent)
-                        if os.path.islink(parent):
-                            os.remove(parent)
-                        else:
-                            os.rmdir(parent)
-                        parent = os.path.dirname(parent)
-                    continue
-                yield dp, dps - empty_folders, fns
-
-
-def walk_and_compute_sha1_from_directory(rootdir,
-                                         dir_ok_fn=default_validation_dir,
-                                         with_root_tree=True,
-                                         remove_empty_folder=False):
-    """(Deprecated) TODO migrate the code to
-    compute_hashes_from_directory.
-
-    Compute git sha1 from directory rootdir.
-
-    Args:
-        - rootdir: Root directory from which beginning the git hash computation
-
-        - dir_ok_fn: Filter function to filter directory according to rules
-        defined in the function. By default, all folders are ok.
-        Example override: dir_ok_fn = lambda dirpath: b'svn' not in dirpath
-
-        - with_root_tree: Determine if we compute the upper root tree's
-          checksums. As a default, we want it. One possible use case where this
-          is not useful is the update (cf. `update_checksums_from`)
-
-    Returns:
-        Dictionary of entries with keys <path-name> and as values a list of
-        directory entries.
-        Those are list of dictionary with keys:
-          - 'perms'
-          - 'type'
-          - 'name'
-          - 'sha1_git'
-          - and specifically content: 'sha1', 'sha256', ...
-
-    Note:
-        One special key is ROOT_TREE_KEY to indicate the upper root of the
-        directory (this is the revision's directory).
-
-    Raises:
-        Nothing
-        If something is raised, this is a programmatic error.
-
-    """
-    ls_hashes = {}
-    all_links = set()
-
-    if rootdir.endswith(b'/'):
-        rootdir = rootdir.rstrip(b'/')
-
-    for dirpath, dirnames, filenames in _walk(
-            rootdir, dir_ok_fn, remove_empty_folder):
-        hashes = []
-
-        links = (file
-                 for file in filenames.union(dirnames)
-                 if os.path.islink(file))
-
-        for linkpath in links:
-            all_links.add(linkpath)
-            m_hashes = compute_link_metadata(linkpath)
-            hashes.append(m_hashes)
-
-        for filepath in (file for file in filenames if file not in all_links):
-            m_hashes = compute_blob_metadata(filepath)
-            hashes.append(m_hashes)
-
-        ls_hashes[dirpath] = hashes
-
-        dir_hashes = []
-        for fulldirname in (dir for dir in dirnames if dir not in all_links):
-            tree_hash = _compute_tree_metadata(fulldirname,
-                                               ls_hashes[fulldirname])
-            dir_hashes.append(tree_hash)
-
-        ls_hashes[dirpath].extend(dir_hashes)
-
-    if with_root_tree:
-        # compute the current directory hashes
-        root_hash = {
-            'sha1_git': _compute_directory_git_sha1(ls_hashes[rootdir]),
-            'path': rootdir,
-            'name': os.path.basename(rootdir),
-            'perms': GitPerm.TREE,
-            'type': GitType.TREE
-        }
-        ls_hashes[ROOT_TREE_KEY] = [root_hash]
-
-    return ls_hashes
-
-
-def compute_hashes_from_directory(rootdir,
-                                  dir_ok_fn=default_validation_dir,
-                                  remove_empty_folder=False):
-    """Compute git sha1 from directory rootdir.
-
-    Args:
-        - rootdir: Root directory from which beginning the git hash
-          computation
-
-        - dir_ok_fn: Filter function to filter directory according to rules
-        defined in the function. By default, all folders are ok.
-        Example override: dir_ok_fn = lambda dirpath: b'svn' not in dirpath
-
-    Returns:
-        Dictionary of entries with keys absolute path name.
-        Path-name can be a file/link or directory.
-        The associated value is a dictionary with:
-        - checksums: the dictionary with the hashes for the link/file/dir
-        Those are list of dictionary with keys:
-          - 'perms'
-          - 'type'
-          - 'name'
-          - 'sha1_git'
-          - and specifically content: 'sha1', 'sha256', ...
-
-        - children: Only for a directory, the set of children paths
-
-    Note:
-        One special key is the / which indicates the upper root of
-        the directory (this is the revision's directory).
-
-    Raises:
-        Nothing
-        If something is raised, this is a programmatic error.
-
-    """
-    def _get_dict_from_dirpath(_dict, path):
-        """Retrieve the default associated value for key path.
-
-        """
-        return _dict.get(path, dict(children=set(), checksums=None))
-
-    def _get_dict_from_filepath(_dict, path):
-        """Retrieve the default associated value for key path.
-
-        """
-        return _dict.get(path, dict(checksums=None))
-
-    ls_hashes = {}
-    all_links = set()
-
-    if rootdir.endswith(b'/'):
-        rootdir = rootdir.rstrip(b'/')
-
-    for dirpath, dirnames, filenames in _walk(
-            rootdir, dir_ok_fn, remove_empty_folder):
-
-        dir_entry = _get_dict_from_dirpath(ls_hashes, dirpath)
-        children = dir_entry['children']
-
-        links = (file
-                 for file in filenames.union(dirnames)
-                 if os.path.islink(file))
-
-        for linkpath in links:
-            all_links.add(linkpath)
-            m_hashes = compute_link_metadata(linkpath)
-            d = _get_dict_from_filepath(ls_hashes, linkpath)
-            d['checksums'] = m_hashes
-            ls_hashes[linkpath] = d
-            children.add(linkpath)
-
-        for filepath in (file for file in filenames if file not in all_links):
-            m_hashes = compute_blob_metadata(filepath)
-            d = _get_dict_from_filepath(ls_hashes, filepath)
-            d['checksums'] = m_hashes
-            ls_hashes[filepath] = d
-            children.add(filepath)
-
-        for fulldirname in (dir for dir in dirnames if dir not in all_links):
-            d_hashes = _get_dict_from_dirpath(ls_hashes, fulldirname)
-            tree_hash = _compute_tree_metadata(
-                fulldirname,
-                (ls_hashes[p]['checksums'] for p in d_hashes['children'])
-            )
-            d = _get_dict_from_dirpath(ls_hashes, fulldirname)
-            d['checksums'] = tree_hash
-            ls_hashes[fulldirname] = d
-            children.add(fulldirname)
-
-        dir_entry['children'] = children
-        ls_hashes[dirpath] = dir_entry
-
-    # compute the current directory hashes
-    d_hashes = _get_dict_from_dirpath(ls_hashes, rootdir)
-    root_hash = {
-        'sha1_git': _compute_directory_git_sha1(
-            (ls_hashes[p]['checksums'] for p in d_hashes['children'])
-        ),
-        'path': rootdir,
-        'name': os.path.basename(rootdir),
-        'perms': GitPerm.TREE,
-        'type': GitType.TREE
-    }
-    d_hashes['checksums'] = root_hash
-    ls_hashes[rootdir] = d_hashes
-
-    return ls_hashes
-
-
-def children_hashes(children, objects):
-    """Given a collection of children path, yield the corresponding
-    hashes.
-
-    Args:
-        objects: objects hash as returned by git.compute_hashes_from_directory.
-        children: collection of bytes path
-
-    Yields:
-        Dictionary hashes
-
-    """
-    for p in children:
-        c = objects.get(p)
-        if c:
-            h = c.get('checksums')
-            if h:
-                yield h
-
-
-def objects_per_type(filter_type, objects_per_path):
-    """Given an object dictionary returned by
-    `swh.model.git.compute_hashes_from_directory`, yields
-    corresponding element type's hashes
-
-    Args:
-        filter_type: one of GitType enum
-        objects_per_path:
-
-    Yields:
-        Elements of type filter_type's hashes
-
-    """
-    for path, obj in objects_per_path.items():
-        o = obj['checksums']
-        if o['type'] == filter_type:
-            if 'children' in obj:  # for trees
-                if obj['children']:
-                    o['children'] = children_hashes(obj['children'],
-                                                    objects_per_path)
-                else:
-                    o['children'] = []
-            yield o
diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py
index f9aca1bc3f45c56874f51e33023402afa92f2952..4d6f9dba2088d6a0ceb8e489fe1587ba53c69d0c 100644
--- a/swh/model/hashutil.py
+++ b/swh/model/hashutil.py
@@ -28,28 +28,30 @@ import binascii
 import functools
 import hashlib
 import os
-import sys
 
 from io import BytesIO
 
-# Supported algorithms
 ALGORITHMS = set(['sha1', 'sha256', 'sha1_git', 'blake2s256', 'blake2b512'])
+"""Hashing algorithms supported by this module"""
 
-# Default algorithms used
 DEFAULT_ALGORITHMS = set(['sha1', 'sha256', 'sha1_git', 'blake2s256'])
+"""Algorithms computed by default when calling the functions from this module.
+
+Subset of :const:`ALGORITHMS`.
+"""
 
-# should be a multiple of 64 (sha1/sha256's block size)
-# FWIW coreutils' sha1sum uses 32768
 HASH_BLOCK_SIZE = 32768
+"""Block size for streaming hash computations made in this module"""
 
-# Prior to python3.4, only blake2 is available through pyblake2 module
-# From 3.5 onwards, it's been integrated in python
-if sys.version_info.major == 3 and sys.version_info.minor <= 4:
-    import pyblake2
-    # register those hash algorithms in hashlib
-    __cache = hashlib.__builtin_constructor_cache
-    __cache['blake2s256'] = pyblake2.blake2s
-    __cache['blake2b512'] = pyblake2.blake2b
+# Load blake2 hashes from pyblake2 if they are not available in the builtin
+# hashlib
+__pyblake2_hashes = {'blake2s256': 'blake2s',
+                     'blake2b512': 'blake2b'}
+__cache = hashlib.__builtin_constructor_cache
+for __hash, __pyblake2_fn in __pyblake2_hashes.items():
+    if __hash not in hashlib.algorithms_available:
+        import pyblake2
+        __cache[__hash] = getattr(pyblake2, __pyblake2_fn)
 
 
 def _new_git_hash(base_algo, git_type, length):
@@ -64,7 +66,7 @@ def _new_git_hash(base_algo, git_type, length):
      - One NUL byte
 
     Args:
-        base_algo: a hashlib-supported algorithm
+        base_algo (str from :const:`ALGORITHMS`): a hashlib-supported algorithm
         git_type: the type of the git object (supposedly one of 'blob',
                   'commit', 'tag', 'tree')
         length: the length of the git object you're encoding
@@ -90,7 +92,7 @@ def _new_hash(algo, length=None):
     Args:
         algo (str): a hashing algorithm (one of ALGORITHMS)
         length (int): the length of the hashed payload (needed for
-                git-specific algorithms)
+          git-specific algorithms)
 
     Returns:
         a hashutil.hash object
@@ -210,7 +212,15 @@ def hash_git_data(data, git_type, base_algo='sha1'):
 
 @functools.lru_cache()
 def hash_to_hex(hash):
-    """Converts a hash (in hex or bytes form) to its hexadecimal ascii form"""
+    """Converts a hash (in hex or bytes form) to its hexadecimal ascii form
+
+    Args:
+      hash (str or bytes): a :class:`bytes` hash or a :class:`str` containing
+        the hexadecimal form of the hash
+
+    Returns:
+      str: the hexadecimal form of the hash
+    """
     if isinstance(hash, str):
         return hash
     return binascii.hexlify(hash).decode('ascii')
@@ -218,13 +228,28 @@ def hash_to_hex(hash):
 
 @functools.lru_cache()
 def hash_to_bytehex(hash):
-    """Converts a hash to its hexadecimal bytes representation"""
+    """Converts a hash to its hexadecimal bytes representation
+
+    Args:
+      hash (bytes): a :class:`bytes` hash
+
+    Returns:
+      bytes: the hexadecimal form of the hash, as :class:`bytes`
+    """
     return binascii.hexlify(hash)
 
 
 @functools.lru_cache()
 def hash_to_bytes(hash):
-    """Converts a hash (in hex or bytes form) to its raw bytes form"""
+    """Converts a hash (in hex or bytes form) to its raw bytes form
+
+    Args:
+      hash (str or bytes): a :class:`bytes` hash or a :class:`str` containing
+        the hexadecimal form of the hash
+
+    Returns:
+      bytes: the :class:`bytes` form of the hash
+    """
     if isinstance(hash, bytes):
         return hash
     return bytes.fromhex(hash)
@@ -232,5 +257,13 @@ def hash_to_bytes(hash):
 
 @functools.lru_cache()
 def bytehex_to_hash(hex):
-    """Converts a hexadecimal bytes representation of a hash to that hash"""
+    """Converts a hexadecimal bytes representation of a hash to that hash
+
+    Args:
+      hash (bytes): a :class:`bytes` containing the hexadecimal form of the
+        hash encoded in ascii
+
+    Returns:
+      bytes: the :class:`bytes` form of the hash
+    """
     return hash_to_bytes(hex.decode())
diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
index d51304e7e6d4bd917bb0fad3f5c099de32789318..c7a6ce96da6af54f1d19b357733a418730287cb9 100644
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -16,12 +16,12 @@ def identifier_to_bytes(identifier):
 
     Args:
         identifier: an identifier, either a 40-char hexadecimal string or a
-                    bytes object of length 20
+            bytes object of length 20
     Returns:
         The length 20 bytestring corresponding to the given identifier
 
     Raises:
-        ValueError if the identifier is of an unexpected type or length.
+        ValueError: if the identifier is of an unexpected type or length.
     """
 
     if isinstance(identifier, bytes):
@@ -48,7 +48,8 @@ def identifier_to_str(identifier):
 
     Args:
         identifier: an identifier, either a 40-char hexadecimal string or a
-                    bytes object of length 20
+            bytes object of length 20
+
     Returns:
         The length 40 string corresponding to the given identifier, hex encoded
 
@@ -87,7 +88,7 @@ def content_identifier(content):
         A dictionary with all the hashes for the data
 
     Raises:
-        KeyError if the content doesn't have a data member.
+        KeyError: if the content doesn't have a data member.
 
     """
 
@@ -113,7 +114,9 @@ def escape_newlines(snippet):
     """Escape the newlines present in snippet according to git rules.
 
     New lines in git manifests are escaped by indenting the next line by one
-    space."""
+    space.
+
+    """
 
     if b'\n' in snippet:
         return b'\n '.join(snippet.split(b'\n'))
@@ -129,27 +132,30 @@ def directory_identifier(directory):
     trees:
 
     1. Entries of the directory are sorted using the name (or the name with '/'
-    appended for directory entries) as key, in bytes order.
+       appended for directory entries) as key, in bytes order.
 
     2. For each entry of the directory, the following bytes are output:
-        - the octal representation of the permissions for the entry
-          (stored in the 'perms' member), which is a representation of the
-          entry type:
-            b'100644' (int 33188) for files
-            b'100755' (int 33261) for executable files
-            b'120000' (int 40960) for symbolic links
-            b'40000' (int 16384) for directories
-            b'160000' (int 57344) for references to revisions
-        - an ascii space (b'\x20')
-        - the entry's name (as raw bytes), stored in the 'name' member
-        - a null byte (b'\x00')
-        - the 20 byte long identifier of the object pointed at by the entry,
-          stored in the 'target' member:
-            for files or executable files: their blob sha1_git
-            for symbolic links: the blob sha1_git of a file containing the
-                                link destination
-            for directories: their intrinsic identifier
-            for revisions: their intrinsic identifier
+
+      - the octal representation of the permissions for the entry (stored in
+        the 'perms' member), which is a representation of the entry type:
+
+        - b'100644' (int 33188) for files
+        - b'100755' (int 33261) for executable files
+        - b'120000' (int 40960) for symbolic links
+        - b'40000'  (int 16384) for directories
+        - b'160000' (int 57344) for references to revisions
+
+      - an ascii space (b'\x20')
+      - the entry's name (as raw bytes), stored in the 'name' member
+      - a null byte (b'\x00')
+      - the 20 byte long identifier of the object pointed at by the entry,
+        stored in the 'target' member:
+
+        - for files or executable files: their blob sha1_git
+        - for symbolic links: the blob sha1_git of a file containing the link
+          destination
+        - for directories: their intrinsic identifier
+        - for revisions: their intrinsic identifier
 
       (Note that there is no separator between entries)
 
@@ -200,8 +206,9 @@ def format_offset(offset, negative_utc=None):
     """Convert an integer number of minutes into an offset representation.
 
     The offset representation is [+-]hhmm where:
-        hh is the number of hours;
-        mm is the number of minutes.
+
+    - hh is the number of hours;
+    - mm is the number of minutes.
 
     A null offset is represented as +0000.
     """
@@ -221,21 +228,25 @@ def normalize_timestamp(time_representation):
     """Normalize a time representation for processing by Software Heritage
 
     This function supports a numeric timestamp (representing a number of
-    seconds since the UNIX epoch, 1970-01-01 at 00:00 UTC), a datetime.datetime
-    object (with timezone information), or a normalized Software
-    Heritage time representation (idempotency).
+    seconds since the UNIX epoch, 1970-01-01 at 00:00 UTC), a
+    :obj:`datetime.datetime` object (with timezone information), or a
+    normalized Software Heritage time representation (idempotency).
 
     Args:
         time_representation: the representation of a timestamp
 
-    Returns: a normalized dictionary with three keys
+    Returns:
+        dict: a normalized dictionary with three keys:
+
+            - timestamp: a dict with two optional keys:
+
+               - seconds: the integral number of seconds since the UNIX epoch
+               - microseconds: the integral number of microseconds
 
-     - timestamp: a dict with two optional keys:
-        - seconds: the integral number of seconds since the UNIX epoch
-        - microseconds: the integral number of microseconds
-     - offset: the timezone offset as a number of minutes relative to UTC
-     - negative_utc: a boolean representing whether the offset is -0000 when
-       offset = 0.
+            - offset: the timezone offset as a number of minutes relative to
+              UTC
+            - negative_utc: a boolean representing whether the offset is -0000
+              when offset = 0.
 
     """
 
@@ -321,11 +332,12 @@ def format_author_line(header, author, date_offset):
     """Format a an author line according to git standards.
 
     An author line has three components:
-     - a header, describing the type of author (author, committer, tagger)
-     - a name and email, which is an arbitrary bytestring
-     - optionally, a timestamp with UTC offset specification
 
-    The author line is formatted thus:
+    - a header, describing the type of author (author, committer, tagger)
+    - a name and email, which is an arbitrary bytestring
+    - optionally, a timestamp with UTC offset specification
+
+    The author line is formatted thus::
 
         `header` `name and email`[ `timestamp` `utc_offset`]
 
@@ -343,11 +355,11 @@ def format_author_line(header, author, date_offset):
 
     Args:
         header: the header of the author line (one of 'author', 'committer',
-                'tagger')
+            'tagger')
         author: an author specification (dict with two bytes values: name and
-                email, or byte value)
+            email, or byte value)
         date_offset: a normalized date/time representation as returned by
-                     `normalize_timestamp`.
+            :func:`normalize_timestamp`.
 
     Returns:
         the newline-terminated byte string containing the author line
@@ -373,37 +385,36 @@ def revision_identifier(revision):
     """Return the intrinsic identifier for a revision.
 
     The fields used for the revision identifier computation are:
-     - directory
-     - parents
-     - author
-     - author_date
-     - committer
-     - committer_date
-     - metadata -> extra_headers
-     - message
+
+    - directory
+    - parents
+    - author
+    - author_date
+    - committer
+    - committer_date
+    - metadata -> extra_headers
+    - message
 
     A revision's identifier is the 'git'-checksum of a commit manifest
-    constructed as follows (newlines are a single ASCII newline character):
-
-    ```
-    tree <directory identifier>
-    [for each parent in parents]
-    parent <parent identifier>
-    [end for each parents]
-    author <author> <author_date>
-    committer <committer> <committer_date>
-    [for each key, value in extra_headers]
-    <key> <encoded value>
-    [end for each extra_headers]
-
-    <message>
-    ```
+    constructed as follows (newlines are a single ASCII newline character)::
+
+        tree <directory identifier>
+        [for each parent in parents]
+        parent <parent identifier>
+        [end for each parents]
+        author <author> <author_date>
+        committer <committer> <committer_date>
+        [for each key, value in extra_headers]
+        <key> <encoded value>
+        [end for each extra_headers]
+
+        <message>
 
     The directory identifier is the ascii representation of its hexadecimal
     encoding.
 
-    Author and committer are formatted with the `format_author` function.
-    Dates are formatted with the `format_date_offset` function.
+    Author and committer are formatted with the :func:`format_author` function.
+    Dates are formatted with the :func:`format_offset` function.
 
     Extra headers are an ordered list of [key, value] pairs. Keys are strings
     and get encoded to utf-8 for identifier computation. Values are either byte
diff --git a/swh/model/merkle.py b/swh/model/merkle.py
new file mode 100644
index 0000000000000000000000000000000000000000..c75cc2c2203f771e49e9c3627e0d7ecdd7960541
--- /dev/null
+++ b/swh/model/merkle.py
@@ -0,0 +1,286 @@
+# Copyright (C) 2017 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+"""Merkle tree data structure"""
+
+import abc
+import collections
+
+
+def deep_update(left, right):
+    """Recursively update the left mapping with deeply nested values from the right
+    mapping.
+
+    This function is useful to merge the results of several calls to
+    :func:`MerkleNode.collect`.
+
+    Arguments:
+      left: a mapping (modified by the update operation)
+      right: a mapping
+
+    Returns:
+      the left mapping, updated with nested values from the right mapping
+
+    Example:
+        >>> a = {
+        ...     'key1': {
+        ...         'key2': {
+        ...              'key3': 'value1/2/3',
+        ...         },
+        ...     },
+        ... }
+        >>> deep_update(a, {
+        ...     'key1': {
+        ...         'key2': {
+        ...              'key4': 'value1/2/4',
+        ...         },
+        ...     },
+        ... }) == {
+        ...     'key1': {
+        ...         'key2': {
+        ...             'key3': 'value1/2/3',
+        ...             'key4': 'value1/2/4',
+        ...         },
+        ...     },
+        ... }
+        True
+        >>> deep_update(a, {
+        ...     'key1': {
+        ...         'key2': {
+        ...              'key3': 'newvalue1/2/3',
+        ...         },
+        ...     },
+        ... }) == {
+        ...     'key1': {
+        ...         'key2': {
+        ...             'key3': 'newvalue1/2/3',
+        ...             'key4': 'value1/2/4',
+        ...         },
+        ...     },
+        ... }
+        True
+
+    """
+    for key, rvalue in right.items():
+        if isinstance(rvalue, collections.Mapping):
+            new_lvalue = deep_update(left.get(key, {}), rvalue)
+            left[key] = new_lvalue
+        else:
+            left[key] = rvalue
+    return left
+
+
+class MerkleNode(dict, metaclass=abc.ABCMeta):
+    """Representation of a node in a Merkle Tree.
+
+    A (generalized) `Merkle Tree`_ is a tree in which every node is labeled
+    with a hash of its own data and the hash of its children.
+
+    .. _Merkle Tree: https://en.wikipedia.org/wiki/Merkle_tree
+
+    In pseudocode::
+
+      node.hash = hash(node.data
+                       + sum(child.hash for child in node.children))
+
+    This class efficiently implements the Merkle Tree data structure on top of
+    a Python :class:`dict`, minimizing hash computations and new data
+    collections when updating nodes.
+
+    Node data is stored in the :attr:`data` attribute, while (named) children
+    are stored as items of the underlying dictionary.
+
+    Addition, update and removal of objects are instrumented to automatically
+    invalidate the hashes of the current node as well as its registered
+    parents; It also resets the collection status of the objects so the updated
+    objects can be collected.
+
+    The collection of updated data from the tree is implemented through the
+    :func:`collect` function and associated helpers.
+
+    Attributes:
+      data (dict): data associated to the current node
+      parents (list): known parents of the current node
+      collected (bool): whether the current node has been collected
+
+    """
+    __slots__ = ['parents', 'data', '__hash', 'collected']
+
+    type = None
+    """Type of the current node (used as a classifier for :func:`collect`)"""
+
+    def __init__(self, data=None):
+        super().__init__()
+        self.parents = []
+        self.data = data
+        self.__hash = None
+        self.collected = False
+
+    def invalidate_hash(self):
+        """Invalidate the cached hash of the current node."""
+        if not self.__hash:
+            return
+
+        self.__hash = None
+        self.collected = False
+        for parent in self.parents:
+            parent.invalidate_hash()
+
+    def update_hash(self, *, force=False):
+        """Recursively compute the hash of the current node.
+
+        Args:
+          force (bool): invalidate the cache and force the computation for
+            this node and all children.
+        """
+        if self.__hash and not force:
+            return self.__hash
+
+        if force:
+            self.invalidate_hash()
+
+        for child in self.values():
+            child.update_hash(force=force)
+
+        self.__hash = self.compute_hash()
+        return self.__hash
+
+    @property
+    def hash(self):
+        """The hash of the current node, as calculated by
+        :func:`compute_hash`.
+        """
+        return self.update_hash()
+
+    @abc.abstractmethod
+    def compute_hash(self):
+        """Compute the hash of the current node.
+
+        The hash should depend on the data of the node, as well as on hashes
+        of the children nodes.
+        """
+        raise NotImplementedError('Must implement compute_hash method')
+
+    def __setitem__(self, name, new_child):
+        """Add a child, invalidating the current hash"""
+        self.invalidate_hash()
+
+        super().__setitem__(name, new_child)
+
+        new_child.parents.append(self)
+
+    def __delitem__(self, name):
+        """Remove a child, invalidating the current hash"""
+        if name in self:
+            self.invalidate_hash()
+            self[name].parents.remove(self)
+            super().__delitem__(name)
+        else:
+            raise KeyError(name)
+
+    def update(self, new_children):
+        """Add several named children from a dictionary"""
+        if not new_children:
+            return
+
+        self.invalidate_hash()
+
+        for name, new_child in new_children.items():
+            new_child.parents.append(self)
+            if name in self:
+                self[name].parents.remove(self)
+
+        super().update(new_children)
+
+    def get_data(self, **kwargs):
+        """Retrieve and format the collected data for the current node, for use by
+        :func:`collect`.
+
+        Can be overridden, for instance when you want the collected data to
+        contain information about the child nodes.
+
+        Arguments:
+          kwargs: allow subclasses to alter behaviour depending on how
+            :func:`collect` is called.
+
+        Returns:
+          data formatted for :func:`collect`
+        """
+        return self.data
+
+    def collect_node(self, **kwargs):
+        """Collect the data for the current node, for use by :func:`collect`.
+
+        Arguments:
+          kwargs: passed as-is to :func:`get_data`.
+
+        Returns:
+          A :class:`dict` compatible with :func:`collect`.
+        """
+        if not self.collected:
+            self.collected = True
+            return {self.type: {self.hash: self.get_data(**kwargs)}}
+        else:
+            return {}
+
+    def collect(self, **kwargs):
+        """Collect the data for all nodes in the subtree rooted at `self`.
+
+        The data is deduplicated by type and by hash.
+
+        Arguments:
+          kwargs: passed as-is to :func:`get_data`.
+
+        Returns:
+           A :class:`dict` with the following structure::
+
+             {
+               'typeA': {
+                 node1.hash: node1.get_data(),
+                 node2.hash: node2.get_data(),
+               },
+               'typeB': {
+                 node3.hash: node3.get_data(),
+                 ...
+               },
+               ...
+             }
+        """
+        ret = self.collect_node(**kwargs)
+        for child in self.values():
+            deep_update(ret, child.collect(**kwargs))
+
+        return ret
+
+    def reset_collect(self):
+        """Recursively unmark collected nodes in the subtree rooted at `self`.
+
+        This lets the caller use :func:`collect` again.
+        """
+        self.collected = False
+
+        for child in self.values():
+            child.reset_collect()
+
+
+class MerkleLeaf(MerkleNode):
+    """A leaf to a Merkle tree.
+
+    A Merkle leaf is simply a Merkle node with children disabled.
+    """
+    __slots__ = []
+
+    def __setitem__(self, name, child):
+        raise ValueError('%s is a leaf' % self.__class__.__name__)
+
+    def __getitem__(self, name):
+        raise ValueError('%s is a leaf' % self.__class__.__name__)
+
+    def __delitem__(self, name):
+        raise ValueError('%s is a leaf' % self.__class__.__name__)
+
+    def update(self, new_children):
+        """Children update operation. Disabled for leaves."""
+        raise ValueError('%s is a leaf' % self.__class__.__name__)
diff --git a/swh/model/tests/generate_testdata_from_disk.py b/swh/model/tests/generate_testdata_from_disk.py
new file mode 100644
index 0000000000000000000000000000000000000000..35d4f480aa3b0657f9c1abdbd15af91ce82b5ad9
--- /dev/null
+++ b/swh/model/tests/generate_testdata_from_disk.py
@@ -0,0 +1,92 @@
+# Copyright (C) 2017 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from operator import itemgetter
+import os
+import sys
+
+from swh.model.from_disk import Directory, DentryPerms
+from swh.model.hashutil import ALGORITHMS, hash_to_hex
+
+
+def generate_from_directory(varname, directory, indent=0):
+    """Generate test data from a given directory"""
+    def get_data(member, path):
+        yield (path, member.get_data())
+        if isinstance(member, Directory):
+            for name, child in member.items():
+                yield from get_data(child, os.path.join(path, name))
+
+    data = dict(get_data(directory, b''))
+    out = []
+
+    def format_hash(h, indent=0):
+        spindent = ' ' * indent
+        if len(h) > 20:
+            cutoff = len(h)//2
+            parts = h[:cutoff], h[cutoff:]
+        else:
+            parts = [h]
+
+        out.append('hash_to_bytes(\n')
+        for part in parts:
+            out.append(spindent + '    %s\n' % repr(hash_to_hex(part)))
+        out.append(spindent + ')')
+
+    def format_dict_items(d, indent=0):
+        spindent = ' ' * indent
+        for key, value in sorted(d.items()):
+            if isinstance(key, bytes):
+                out.append(spindent + repr(key) + ': {\n')
+                format_dict_items(value, indent=indent + 4)
+                out.append(spindent + '}')
+            else:
+                out.append(spindent + repr(key) + ': ')
+                if key == 'entries':
+                    if not value:
+                        out.append('[]')
+                    else:
+                        out.append('[')
+                        last_index = len(value) - 1
+                        for i, entry in enumerate(
+                                sorted(value, key=itemgetter('name'))):
+                            if i:
+                                out.append(' ')
+                            out.append('{\n')
+                            format_dict_items(entry, indent=indent + 4)
+                            if i != last_index:
+                                out.append(spindent + '},')
+                        out.append(spindent + '}]')
+                elif key in ALGORITHMS | {'id', 'target'}:
+                    format_hash(value, indent=indent)
+                elif isinstance(value, DentryPerms):
+                    out.append(str(value))
+                else:
+                    out.append(repr(value))
+            out.append(',\n')
+
+    spindent = ' ' * indent
+    out.append(spindent + '%s = {\n' % varname)
+    format_dict_items(data, indent=4 + indent)
+    out.append(spindent + '}')
+
+    return ''.join(out)
+
+
+if __name__ == '__main__':
+    if not sys.argv[1:]:
+        print("Usage: %s dir1 dir2" % sys.argv[0], file=sys.stderr)
+        exit(2)
+
+    for dirname in sys.argv[1:]:
+        basename = os.path.basename(dirname)
+        varname = 'expected_%s' % basename
+        testdata = generate_from_directory(
+            varname,
+            Directory.from_disk(path=os.fsencode(dirname)),
+            indent=8
+        )
+        print(testdata)
+        print()
diff --git a/swh/model/tests/test_from_disk.py b/swh/model/tests/test_from_disk.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e568ec25d21985c00e8e5243992ea0bb753aa19
--- /dev/null
+++ b/swh/model/tests/test_from_disk.py
@@ -0,0 +1,789 @@
+# Copyright (C) 2017 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+import tarfile
+import tempfile
+import unittest
+
+from nose.plugins.attrib import attr
+
+from swh.model import from_disk
+from swh.model.from_disk import Content, Directory, DentryPerms
+from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
+
+
+class ModeToPerms(unittest.TestCase):
+    def setUp(self):
+        super().setUp()
+
+        # Generate a full permissions map
+        self.perms_map = {}
+
+        # Symlinks
+        for i in range(0o120000, 0o127777 + 1):
+            self.perms_map[i] = DentryPerms.symlink
+
+        # Directories
+        for i in range(0o040000, 0o047777 + 1):
+            self.perms_map[i] = DentryPerms.directory
+
+        # Other file types: socket, regular file, block device, character
+        # device, fifo all map to regular files
+        for ft in [0o140000, 0o100000, 0o060000, 0o020000, 0o010000]:
+            for i in range(ft, ft + 0o7777 + 1):
+                if i & 0o111:
+                    # executable bits are set
+                    self.perms_map[i] = DentryPerms.executable_content
+                else:
+                    self.perms_map[i] = DentryPerms.content
+
+    def test_exhaustive_mode_to_perms(self):
+        for fmode, perm in self.perms_map.items():
+            self.assertEqual(perm, from_disk.mode_to_perms(fmode))
+
+
+class DataMixin:
+    maxDiff = None
+
+    def setUp(self):
+        self.tmpdir = tempfile.TemporaryDirectory(
+            prefix='swh.model.from_disk'
+        )
+        self.tmpdir_name = os.fsencode(self.tmpdir.name)
+
+        self.contents = {
+            b'file': {
+                'data': b'42\n',
+                'sha1': hash_to_bytes(
+                    '34973274ccef6ab4dfaaf86599792fa9c3fe4689'
+                ),
+                'sha256': hash_to_bytes(
+                    '084c799cd551dd1d8d5c5f9a5d593b2e'
+                    '931f5e36122ee5c793c1d08a19839cc0'
+                ),
+                'sha1_git': hash_to_bytes(
+                    'd81cc0710eb6cf9efd5b920a8453e1e07157b6cd'),
+                'blake2s256': hash_to_bytes(
+                    'd5fe1939576527e42cfd76a9455a2432'
+                    'fe7f56669564577dd93c4280e76d661d'
+                ),
+                'length': 3,
+                'mode': 0o100644
+            },
+        }
+
+        self.symlinks = {
+            b'symlink': {
+                'data': b'target',
+                'blake2s256': hash_to_bytes(
+                    '595d221b30fdd8e10e2fdf18376e688e'
+                    '9f18d56fd9b6d1eb6a822f8c146c6da6'
+                ),
+                'sha1': hash_to_bytes(
+                    '0e8a3ad980ec179856012b7eecf4327e99cd44cd'
+                ),
+                'sha1_git': hash_to_bytes(
+                    '1de565933b05f74c75ff9a6520af5f9f8a5a2f1d'
+                ),
+                'sha256': hash_to_bytes(
+                    '34a04005bcaf206eec990bd9637d9fdb'
+                    '6725e0a0c0d4aebf003f17f4c956eb5c'
+                ),
+                'length': 6,
+                'perms': DentryPerms.symlink,
+            }
+        }
+
+        self.specials = {
+            b'fifo': os.mkfifo,
+            b'devnull': lambda path: os.mknod(path, device=os.makedev(1, 3)),
+        }
+
+        self.empty_content = {
+            'data': b'',
+            'length': 0,
+            'blake2s256': hash_to_bytes(
+                '69217a3079908094e11121d042354a7c'
+                '1f55b6482ca1a51e1b250dfd1ed0eef9'
+            ),
+            'sha1': hash_to_bytes(
+                'da39a3ee5e6b4b0d3255bfef95601890afd80709'
+            ),
+            'sha1_git': hash_to_bytes(
+                'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391'
+            ),
+            'sha256': hash_to_bytes(
+                'e3b0c44298fc1c149afbf4c8996fb924'
+                '27ae41e4649b934ca495991b7852b855'
+            ),
+            'perms': DentryPerms.content,
+        }
+
+        self.empty_directory = {
+            'id': hash_to_bytes(
+                '4b825dc642cb6eb9a060e54bf8d69288fbee4904'
+            ),
+            'entries': [],
+        }
+
+        # Generated with generate_testdata_from_disk
+        self.tarball_contents = {
+            b'': {
+                'entries': [{
+                    'name': b'bar',
+                    'perms': DentryPerms.directory,
+                    'target': hash_to_bytes(
+                        '3c1f578394f4623f74a0ba7fe761729f59fc6ec4'
+                    ),
+                    'type': 'dir',
+                }, {
+                    'name': b'empty-folder',
+                    'perms': DentryPerms.directory,
+                    'target': hash_to_bytes(
+                        '4b825dc642cb6eb9a060e54bf8d69288fbee4904'
+                    ),
+                    'type': 'dir',
+                }, {
+                    'name': b'foo',
+                    'perms': DentryPerms.directory,
+                    'target': hash_to_bytes(
+                        '2b41c40f0d1fbffcba12497db71fba83fcca96e5'
+                    ),
+                    'type': 'dir',
+                }, {
+                    'name': b'link-to-another-quote',
+                    'perms': DentryPerms.symlink,
+                    'target': hash_to_bytes(
+                        '7d5c08111e21c8a9f71540939998551683375fad'
+                    ),
+                    'type': 'file',
+                }, {
+                    'name': b'link-to-binary',
+                    'perms': DentryPerms.symlink,
+                    'target': hash_to_bytes(
+                        'e86b45e538d9b6888c969c89fbd22a85aa0e0366'
+                    ),
+                    'type': 'file',
+                }, {
+                    'name': b'link-to-foo',
+                    'perms': DentryPerms.symlink,
+                    'target': hash_to_bytes(
+                        '19102815663d23f8b75a47e7a01965dcdc96468c'
+                    ),
+                    'type': 'file',
+                }, {
+                    'name': b'some-binary',
+                    'perms': DentryPerms.executable_content,
+                    'target': hash_to_bytes(
+                        '68769579c3eaadbe555379b9c3538e6628bae1eb'
+                    ),
+                    'type': 'file',
+                }],
+                'id': hash_to_bytes(
+                    'e8b0f1466af8608c8a3fb9879db172b887e80759'
+                ),
+            },
+            b'bar': {
+                'entries': [{
+                    'name': b'barfoo',
+                    'perms': DentryPerms.directory,
+                    'target': hash_to_bytes(
+                        'c3020f6bf135a38c6df3afeb5fb38232c5e07087'
+                    ),
+                    'type': 'dir',
+                }],
+                'id': hash_to_bytes(
+                    '3c1f578394f4623f74a0ba7fe761729f59fc6ec4'
+                ),
+            },
+            b'bar/barfoo': {
+                'entries': [{
+                    'name': b'another-quote.org',
+                    'perms': DentryPerms.content,
+                    'target': hash_to_bytes(
+                        '133693b125bad2b4ac318535b84901ebb1f6b638'
+                    ),
+                    'type': 'file',
+                }],
+                'id': hash_to_bytes(
+                    'c3020f6bf135a38c6df3afeb5fb38232c5e07087'
+                ),
+            },
+            b'bar/barfoo/another-quote.org': {
+                'blake2s256': hash_to_bytes(
+                    'd26c1cad82d43df0bffa5e7be11a60e3'
+                    '4adb85a218b433cbce5278b10b954fe8'
+                ),
+                'length': 72,
+                'perms': DentryPerms.content,
+                'sha1': hash_to_bytes(
+                    '90a6138ba59915261e179948386aa1cc2aa9220a'
+                ),
+                'sha1_git': hash_to_bytes(
+                    '133693b125bad2b4ac318535b84901ebb1f6b638'
+                ),
+                'sha256': hash_to_bytes(
+                    '3db5ae168055bcd93a4d08285dc99ffe'
+                    'e2883303b23fac5eab850273a8ea5546'
+                ),
+            },
+            b'empty-folder': {
+                'entries': [],
+                'id': hash_to_bytes(
+                    '4b825dc642cb6eb9a060e54bf8d69288fbee4904'
+                ),
+            },
+            b'foo': {
+                'entries': [{
+                    'name': b'barfoo',
+                    'perms': DentryPerms.symlink,
+                    'target': hash_to_bytes(
+                        '8185dfb2c0c2c597d16f75a8a0c37668567c3d7e'
+                    ),
+                    'type': 'file',
+                }, {
+                    'name': b'quotes.md',
+                    'perms': DentryPerms.content,
+                    'target': hash_to_bytes(
+                        '7c4c57ba9ff496ad179b8f65b1d286edbda34c9a'
+                    ),
+                    'type': 'file',
+                }, {
+                    'name': b'rel-link-to-barfoo',
+                    'perms': DentryPerms.symlink,
+                    'target': hash_to_bytes(
+                        'acac326ddd63b0bc70840659d4ac43619484e69f'
+                    ),
+                    'type': 'file',
+                }],
+                'id': hash_to_bytes(
+                    '2b41c40f0d1fbffcba12497db71fba83fcca96e5'
+                ),
+            },
+            b'foo/barfoo': {
+                'blake2s256': hash_to_bytes(
+                    'e1252f2caa4a72653c4efd9af871b62b'
+                    'f2abb7bb2f1b0e95969204bd8a70d4cd'
+                ),
+                'data': b'bar/barfoo',
+                'length': 10,
+                'perms': DentryPerms.symlink,
+                'sha1': hash_to_bytes(
+                    '9057ee6d0162506e01c4d9d5459a7add1fedac37'
+                ),
+                'sha1_git': hash_to_bytes(
+                    '8185dfb2c0c2c597d16f75a8a0c37668567c3d7e'
+                ),
+                'sha256': hash_to_bytes(
+                    '29ad3f5725321b940332c78e403601af'
+                    'ff61daea85e9c80b4a7063b6887ead68'
+                ),
+            },
+            b'foo/quotes.md': {
+                'blake2s256': hash_to_bytes(
+                    'bf7ce4fe304378651ee6348d3e9336ed'
+                    '5ad603d33e83c83ba4e14b46f9b8a80b'
+                ),
+                'length': 66,
+                'perms': DentryPerms.content,
+                'sha1': hash_to_bytes(
+                    '1bf0bb721ac92c18a19b13c0eb3d741cbfadebfc'
+                ),
+                'sha1_git': hash_to_bytes(
+                    '7c4c57ba9ff496ad179b8f65b1d286edbda34c9a'
+                ),
+                'sha256': hash_to_bytes(
+                    'caca942aeda7b308859eb56f909ec96d'
+                    '07a499491690c453f73b9800a93b1659'
+                ),
+            },
+            b'foo/rel-link-to-barfoo': {
+                'blake2s256': hash_to_bytes(
+                    'd9c327421588a1cf61f316615005a2e9'
+                    'c13ac3a4e96d43a24138d718fa0e30db'
+                ),
+                'data': b'../bar/barfoo',
+                'length': 13,
+                'perms': DentryPerms.symlink,
+                'sha1': hash_to_bytes(
+                    'dc51221d308f3aeb2754db48391b85687c2869f4'
+                ),
+                'sha1_git': hash_to_bytes(
+                    'acac326ddd63b0bc70840659d4ac43619484e69f'
+                ),
+                'sha256': hash_to_bytes(
+                    '8007d20db2af40435f42ddef4b8ad76b'
+                    '80adbec26b249fdf0473353f8d99df08'
+                ),
+            },
+            b'link-to-another-quote': {
+                'blake2s256': hash_to_bytes(
+                    '2d0e73cea01ba949c1022dc10c8a43e6'
+                    '6180639662e5dc2737b843382f7b1910'
+                ),
+                'data': b'bar/barfoo/another-quote.org',
+                'length': 28,
+                'perms': DentryPerms.symlink,
+                'sha1': hash_to_bytes(
+                    'cbeed15e79599c90de7383f420fed7acb48ea171'
+                ),
+                'sha1_git': hash_to_bytes(
+                    '7d5c08111e21c8a9f71540939998551683375fad'
+                ),
+                'sha256': hash_to_bytes(
+                    'e6e17d0793aa750a0440eb9ad5b80b25'
+                    '8076637ef0fb68f3ac2e59e4b9ac3ba6'
+                ),
+            },
+            b'link-to-binary': {
+                'blake2s256': hash_to_bytes(
+                    '9ce18b1adecb33f891ca36664da676e1'
+                    '2c772cc193778aac9a137b8dc5834b9b'
+                ),
+                'data': b'some-binary',
+                'length': 11,
+                'perms': DentryPerms.symlink,
+                'sha1': hash_to_bytes(
+                    'd0248714948b3a48a25438232a6f99f0318f59f1'
+                ),
+                'sha1_git': hash_to_bytes(
+                    'e86b45e538d9b6888c969c89fbd22a85aa0e0366'
+                ),
+                'sha256': hash_to_bytes(
+                    '14126e97d83f7d261c5a6889cee73619'
+                    '770ff09e40c5498685aba745be882eff'
+                ),
+            },
+            b'link-to-foo': {
+                'blake2s256': hash_to_bytes(
+                    '08d6cad88075de8f192db097573d0e82'
+                    '9411cd91eb6ec65e8fc16c017edfdb74'
+                ),
+                'data': b'foo',
+                'length': 3,
+                'perms': DentryPerms.symlink,
+                'sha1': hash_to_bytes(
+                    '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
+                ),
+                'sha1_git': hash_to_bytes(
+                    '19102815663d23f8b75a47e7a01965dcdc96468c'
+                ),
+                'sha256': hash_to_bytes(
+                    '2c26b46b68ffc68ff99b453c1d304134'
+                    '13422d706483bfa0f98a5e886266e7ae'
+                ),
+            },
+            b'some-binary': {
+                'blake2s256': hash_to_bytes(
+                    '922e0f7015035212495b090c27577357'
+                    'a740ddd77b0b9e0cd23b5480c07a18c6'
+                ),
+                'length': 5,
+                'perms': DentryPerms.executable_content,
+                'sha1': hash_to_bytes(
+                    '0bbc12d7f4a2a15b143da84617d95cb223c9b23c'
+                ),
+                'sha1_git': hash_to_bytes(
+                    '68769579c3eaadbe555379b9c3538e6628bae1eb'
+                ),
+                'sha256': hash_to_bytes(
+                    'bac650d34a7638bb0aeb5342646d24e3'
+                    'b9ad6b44c9b383621faa482b990a367d'
+                ),
+            },
+        }
+
+    def tearDown(self):
+        self.tmpdir.cleanup()
+
+    def assertContentEqual(self, left, right, *, check_data=False,  # noqa
+                           check_path=False):
+        if not isinstance(left, Content):
+            raise ValueError('%s is not a Content' % left)
+        if isinstance(right, Content):
+            right = right.get_data()
+
+        keys = DEFAULT_ALGORITHMS | {
+            'length',
+            'perms',
+        }
+        if check_data:
+            keys |= {'data'}
+        if check_path:
+            keys |= {'path'}
+
+        failed = []
+        for key in keys:
+            try:
+                lvalue = left.data[key]
+                if key == 'perms' and 'perms' not in right:
+                    rvalue = from_disk.mode_to_perms(right['mode'])
+                else:
+                    rvalue = right[key]
+            except KeyError:
+                failed.append(key)
+                continue
+
+            if lvalue != rvalue:
+                failed.append(key)
+
+        if failed:
+            raise self.failureException(
+                'Content mismatched:\n' +
+                '\n'.join(
+                    'content[%s] = %r != %r' % (
+                        key, left.data.get(key), right.get(key))
+                    for key in failed
+                )
+            )
+
+    def assertDirectoryEqual(self, left, right):  # NoQA
+        if not isinstance(left, Directory):
+            raise ValueError('%s is not a Directory' % left)
+        if isinstance(right, Directory):
+            right = right.get_data()
+
+        return self.assertCountEqual(left.entries, right['entries'])
+
+    def make_contents(self, directory):
+        for filename, content in self.contents.items():
+            path = os.path.join(directory, filename)
+            with open(path, 'wb') as f:
+                f.write(content['data'])
+            os.chmod(path, content['mode'])
+
+    def make_symlinks(self, directory):
+        for filename, symlink in self.symlinks.items():
+            path = os.path.join(directory, filename)
+            os.symlink(symlink['data'], path)
+
+    def make_specials(self, directory):
+        for filename, fn in self.specials.items():
+            path = os.path.join(directory, filename)
+            fn(path)
+
+    def make_from_tarball(self, directory):
+        tarball = os.path.join(os.path.dirname(__file__),
+                               '../../../..',
+                               'swh-storage-testdata',
+                               'dir-folders',
+                               'sample-folder.tgz')
+
+        with tarfile.open(tarball, 'r:gz') as f:
+            f.extractall(os.fsdecode(directory))
+
+
+class TestContent(DataMixin, unittest.TestCase):
+    def setUp(self):
+        super().setUp()
+
+    def test_data_to_content(self):
+        for filename, content in self.contents.items():
+            conv_content = Content.from_bytes(mode=content['mode'],
+                                              data=content['data'])
+            self.assertContentEqual(conv_content, content)
+            self.assertIn(hash_to_hex(conv_content.hash), repr(conv_content))
+
+
+class SymlinkToContent(DataMixin, unittest.TestCase):
+    def setUp(self):
+        super().setUp()
+        self.make_symlinks(self.tmpdir_name)
+
+    def test_symlink_to_content(self):
+        for filename, symlink in self.symlinks.items():
+            path = os.path.join(self.tmpdir_name, filename)
+            perms = 0o120000
+            conv_content = Content.from_symlink(path=path, mode=perms)
+            self.assertContentEqual(conv_content, symlink)
+
+
+class FileToContent(DataMixin, unittest.TestCase):
+    def setUp(self):
+        super().setUp()
+        self.make_contents(self.tmpdir_name)
+        self.make_symlinks(self.tmpdir_name)
+        self.make_specials(self.tmpdir_name)
+
+    def test_file_to_content(self):
+        # Check whether loading the data works
+        for data in [True, False]:
+            for filename, symlink in self.symlinks.items():
+                path = os.path.join(self.tmpdir_name, filename)
+                conv_content = Content.from_file(path=path, data=data)
+                self.assertContentEqual(conv_content, symlink, check_data=data)
+
+            for filename, content in self.contents.items():
+                path = os.path.join(self.tmpdir_name, filename)
+                conv_content = Content.from_file(path=path, data=data)
+                self.assertContentEqual(conv_content, content, check_data=data)
+
+            for filename in self.specials:
+                path = os.path.join(self.tmpdir_name, filename)
+                conv_content = Content.from_file(path=path, data=data)
+                self.assertContentEqual(conv_content, self.empty_content)
+
+    def test_file_to_content_with_path(self):
+        for filename, content in self.contents.items():
+            content_w_path = content.copy()
+            path = os.path.join(self.tmpdir_name, filename)
+            content_w_path['path'] = path
+            conv_content = Content.from_file(path=path, save_path=True)
+            self.assertContentEqual(conv_content, content_w_path,
+                                    check_path=True)
+
+
+class DirectoryToObjects(DataMixin, unittest.TestCase):
+    def setUp(self):
+        super().setUp()
+        contents = os.path.join(self.tmpdir_name, b'contents')
+        os.mkdir(contents)
+        self.make_contents(contents)
+        symlinks = os.path.join(self.tmpdir_name, b'symlinks')
+        os.mkdir(symlinks)
+        self.make_symlinks(symlinks)
+        specials = os.path.join(self.tmpdir_name, b'specials')
+        os.mkdir(specials)
+        self.make_specials(specials)
+        empties = os.path.join(self.tmpdir_name, b'empty1', b'empty2')
+        os.makedirs(empties)
+
+    def test_directory_to_objects(self):
+        directory = Directory.from_disk(path=self.tmpdir_name)
+
+        for name, value in self.contents.items():
+            self.assertContentEqual(directory[b'contents/' + name], value)
+
+        for name, value in self.symlinks.items():
+            self.assertContentEqual(directory[b'symlinks/' + name], value)
+
+        for name in self.specials:
+            self.assertContentEqual(
+                directory[b'specials/' + name],
+                self.empty_content,
+            )
+
+        self.assertEqual(
+            directory[b'empty1/empty2'].get_data(),
+            self.empty_directory,
+        )
+
+        # Raise on non existent file
+        with self.assertRaisesRegex(KeyError, "b'nonexistent'"):
+            directory[b'empty1/nonexistent']
+
+        # Raise on non existent directory
+        with self.assertRaisesRegex(KeyError, "b'nonexistentdir'"):
+            directory[b'nonexistentdir/file']
+
+        objs = directory.collect()
+
+        self.assertCountEqual(['content', 'directory'], objs)
+
+        self.assertEqual(len(objs['directory']), 6)
+        self.assertEqual(len(objs['content']),
+                         len(self.contents)
+                         + len(self.symlinks)
+                         + 1)
+
+    def test_directory_to_objects_ignore_empty(self):
+        directory = Directory.from_disk(
+            path=self.tmpdir_name,
+            dir_filter=from_disk.ignore_empty_directories
+        )
+
+        for name, value in self.contents.items():
+            self.assertContentEqual(directory[b'contents/' + name], value)
+
+        for name, value in self.symlinks.items():
+            self.assertContentEqual(directory[b'symlinks/' + name], value)
+
+        for name in self.specials:
+            self.assertContentEqual(
+                directory[b'specials/' + name],
+                self.empty_content,
+            )
+
+        # empty directories have been ignored recursively
+        with self.assertRaisesRegex(KeyError, "b'empty1'"):
+            directory[b'empty1']
+        with self.assertRaisesRegex(KeyError, "b'empty1'"):
+            directory[b'empty1/empty2']
+
+        objs = directory.collect()
+
+        self.assertCountEqual(['content', 'directory'], objs)
+
+        self.assertEqual(len(objs['directory']), 4)
+        self.assertEqual(len(objs['content']),
+                         len(self.contents)
+                         + len(self.symlinks)
+                         + 1)
+
+    def test_directory_to_objects_ignore_name(self):
+        directory = Directory.from_disk(
+            path=self.tmpdir_name,
+            dir_filter=from_disk.ignore_named_directories([b'symlinks'])
+        )
+        for name, value in self.contents.items():
+            self.assertContentEqual(directory[b'contents/' + name], value)
+
+        for name in self.specials:
+            self.assertContentEqual(
+                directory[b'specials/' + name],
+                self.empty_content,
+            )
+
+        self.assertEqual(
+            directory[b'empty1/empty2'].get_data(),
+            self.empty_directory,
+        )
+
+        with self.assertRaisesRegex(KeyError, "b'symlinks'"):
+            directory[b'symlinks']
+
+        objs = directory.collect()
+
+        self.assertCountEqual(['content', 'directory'], objs)
+
+        self.assertEqual(len(objs['directory']), 5)
+        self.assertEqual(len(objs['content']),
+                         len(self.contents)
+                         + 1)
+
+    def test_directory_to_objects_ignore_name_case(self):
+        directory = Directory.from_disk(
+            path=self.tmpdir_name,
+            dir_filter=from_disk.ignore_named_directories([b'symLiNks'],
+                                                          case_sensitive=False)
+        )
+        for name, value in self.contents.items():
+            self.assertContentEqual(directory[b'contents/' + name], value)
+
+        for name in self.specials:
+            self.assertContentEqual(
+                directory[b'specials/' + name],
+                self.empty_content,
+            )
+
+        self.assertEqual(
+            directory[b'empty1/empty2'].get_data(),
+            self.empty_directory,
+        )
+
+        with self.assertRaisesRegex(KeyError, "b'symlinks'"):
+            directory[b'symlinks']
+
+        objs = directory.collect()
+
+        self.assertCountEqual(['content', 'directory'], objs)
+
+        self.assertEqual(len(objs['directory']), 5)
+        self.assertEqual(len(objs['content']),
+                         len(self.contents)
+                         + 1)
+
+
+@attr('fs')
+class TarballTest(DataMixin, unittest.TestCase):
+    def setUp(self):
+        super().setUp()
+        self.make_from_tarball(self.tmpdir_name)
+
+    def test_contents_match(self):
+        directory = Directory.from_disk(
+            path=os.path.join(self.tmpdir_name, b'sample-folder')
+        )
+
+        for name, data in self.tarball_contents.items():
+            obj = directory[name]
+            if isinstance(obj, Content):
+                self.assertContentEqual(obj, data)
+            elif isinstance(obj, Directory):
+                self.assertDirectoryEqual(obj, data)
+            else:
+                raise self.failureException('Unknown type for %s' % obj)
+
+
+class DirectoryManipulation(DataMixin, unittest.TestCase):
+    def test_directory_access_nested(self):
+        d = Directory()
+        d[b'a'] = Directory()
+        d[b'a/b'] = Directory()
+
+        self.assertEqual(d[b'a/b'].get_data(), self.empty_directory)
+
+    def test_directory_del_nested(self):
+        d = Directory()
+        d[b'a'] = Directory()
+        d[b'a/b'] = Directory()
+
+        with self.assertRaisesRegex(KeyError, "b'c'"):
+            del d[b'a/b/c']
+
+        with self.assertRaisesRegex(KeyError, "b'level2'"):
+            del d[b'a/level2/c']
+
+        del d[b'a/b']
+
+        self.assertEqual(d[b'a'].get_data(), self.empty_directory)
+
+    def test_directory_access_self(self):
+        d = Directory()
+        self.assertIs(d, d[b''])
+        self.assertIs(d, d[b'/'])
+        self.assertIs(d, d[b'//'])
+
+    def test_directory_access_wrong_type(self):
+        d = Directory()
+        with self.assertRaisesRegex(ValueError, 'bytes from Directory'):
+            d['foo']
+        with self.assertRaisesRegex(ValueError, 'bytes from Directory'):
+            d[42]
+
+    def test_directory_repr(self):
+        entries = [b'a', b'b', b'c']
+        d = Directory()
+        for entry in entries:
+            d[entry] = Directory()
+
+        r = repr(d)
+        self.assertIn(hash_to_hex(d.hash), r)
+
+        for entry in entries:
+            self.assertIn(str(entry), r)
+
+    def test_directory_set_wrong_type_name(self):
+        d = Directory()
+        with self.assertRaisesRegex(ValueError, 'bytes Directory entry'):
+            d['foo'] = Directory()
+        with self.assertRaisesRegex(ValueError, 'bytes Directory entry'):
+            d[42] = Directory()
+
+    def test_directory_set_nul_in_name(self):
+        d = Directory()
+
+        with self.assertRaisesRegex(ValueError, 'nul bytes'):
+            d[b'\x00\x01'] = Directory()
+
+    def test_directory_set_empty_name(self):
+        d = Directory()
+        with self.assertRaisesRegex(ValueError, 'must have a name'):
+            d[b''] = Directory()
+        with self.assertRaisesRegex(ValueError, 'must have a name'):
+            d[b'/'] = Directory()
+
+    def test_directory_set_wrong_type(self):
+        d = Directory()
+        with self.assertRaisesRegex(ValueError, 'Content or Directory'):
+            d[b'entry'] = object()
+
+    def test_directory_del_wrong_type(self):
+        d = Directory()
+        with self.assertRaisesRegex(ValueError, 'bytes Directory entry'):
+            del d['foo']
+        with self.assertRaisesRegex(ValueError, 'bytes Directory entry'):
+            del d[42]
diff --git a/swh/model/tests/test_git.py b/swh/model/tests/test_git.py
deleted file mode 100644
index 0bf81bc8c0e2aa250c87dd53b3ccd14644eebcbf..0000000000000000000000000000000000000000
--- a/swh/model/tests/test_git.py
+++ /dev/null
@@ -1,734 +0,0 @@
-# Copyright (C) 2015-2017  The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import os
-import shutil
-import subprocess
-import tempfile
-import unittest
-
-from nose.plugins.attrib import attr
-from nose.tools import istest
-
-from swh.model import git
-
-
-class GitHashlib(unittest.TestCase):
-    def setUp(self):
-        self.tree_data = b''.join([b'40000 barfoo\0',
-                                   bytes.fromhex('c3020f6bf135a38c6df'
-                                                 '3afeb5fb38232c5e07087'),
-                                   b'100644 blah\0',
-                                   bytes.fromhex('63756ef0df5e4f10b6efa'
-                                                 '33cfe5c758749615f20'),
-                                   b'100644 hello\0',
-                                   bytes.fromhex('907b308167f0880fb2a'
-                                                 '5c0e1614bb0c7620f9dc3')])
-
-        self.commit_data = """tree 1c61f7259dcb770f46b194d941df4f08ff0a3970
-author Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com> 1444054085 +0200
-committer Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com> 1444054085 +0200
-
-initial
-""".encode('utf-8')  # NOQA
-        self.tag_data = """object 24d012aaec0bc5a4d2f62c56399053d6cc72a241
-type commit
-tag 0.0.1
-tagger Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com> 1444225145 +0200
-
-blah
-""".encode('utf-8')  # NOQA
-
-        self.checksums = {
-            'tree_sha1_git': bytes.fromhex('ac212302c45eada382b27bfda795db'
-                                           '121dacdb1c'),
-            'commit_sha1_git': bytes.fromhex('e960570b2e6e2798fa4cfb9af2c399'
-                                             'd629189653'),
-            'tag_sha1_git': bytes.fromhex('bc2b99ba469987bcf1272c189ed534'
-                                          'e9e959f120'),
-        }
-
-    @istest
-    def compute_directory_git_sha1(self):
-        # given
-        dirpath = 'some-dir-path'
-        hashes = {
-            dirpath: [{'perms': git.GitPerm.TREE,
-                       'type': git.GitType.TREE,
-                       'name': b'barfoo',
-                       'sha1_git': bytes.fromhex('c3020f6bf135a38c6df'
-                                                 '3afeb5fb38232c5e07087')},
-                      {'perms': git.GitPerm.BLOB,
-                       'type': git.GitType.BLOB,
-                       'name': b'hello',
-                       'sha1_git': bytes.fromhex('907b308167f0880fb2a'
-                                                 '5c0e1614bb0c7620f9dc3')},
-                      {'perms': git.GitPerm.BLOB,
-                       'type': git.GitType.BLOB,
-                       'name': b'blah',
-                       'sha1_git': bytes.fromhex('63756ef0df5e4f10b6efa'
-                                                 '33cfe5c758749615f20')}]
-        }
-
-        # when
-        checksum = git.compute_directory_git_sha1(dirpath, hashes)
-
-        # then
-        self.assertEqual(checksum, self.checksums['tree_sha1_git'])
-
-    @istest
-    def compute_revision_sha1_git(self):
-        # given
-        tree_hash = bytes.fromhex('1c61f7259dcb770f46b194d941df4f08ff0a3970')
-        revision = {
-            'author': {
-                'name': b'Antoine R. Dumont (@ardumont)',
-                'email': b'antoine.romain.dumont@gmail.com',
-            },
-            'date': {
-                'timestamp': 1444054085,
-                'offset': 120,
-            },
-            'committer': {
-                'name': b'Antoine R. Dumont (@ardumont)',
-                'email': b'antoine.romain.dumont@gmail.com',
-            },
-            'committer_date': {
-                'timestamp': 1444054085,
-                'offset': 120,
-            },
-            'message': b'initial\n',
-            'type': 'tar',
-            'directory': tree_hash,
-            'parents': [],
-        }
-
-        # when
-        checksum = git.compute_revision_sha1_git(revision)
-
-        # then
-        self.assertEqual(checksum, self.checksums['commit_sha1_git'])
-
-    @istest
-    def compute_release_sha1_git(self):
-        # given
-        revision_hash = bytes.fromhex('24d012aaec0bc5a4d2f62c56399053'
-                                      'd6cc72a241')
-        release = {
-            'name': b'0.0.1',
-            'author': {
-                'name': b'Antoine R. Dumont (@ardumont)',
-                'email': b'antoine.romain.dumont@gmail.com',
-            },
-            'date': {
-                'timestamp': 1444225145,
-                'offset': 120,
-            },
-            'message': b'blah\n',
-            'target_type': 'revision',
-            'target': revision_hash,
-        }
-
-        # when
-        checksum = git.compute_release_sha1_git(release)
-
-        # then
-        self.assertEqual(checksum, self.checksums['tag_sha1_git'])
-
-
-@attr('fs')
-class ComputeBlobMetadata(unittest.TestCase):
-    @istest
-    def compute_blob_metadata__special_file_returns_nothing(self):
-        # prepare
-        tmp_root_path = tempfile.mkdtemp().encode('utf-8')
-        name = b'fifo-file'
-        path = os.path.join(tmp_root_path, name)
-
-        # given
-        os.mkfifo(path)
-
-        # when
-        actual_metadata = git.compute_blob_metadata(path)
-
-        # then
-        expected_metadata = {
-            'sha1': b'\xda9\xa3\xee^kK\r2U\xbf\xef\x95`\x18\x90\xaf\xd8\x07\t',
-            'sha1_git': b'\xe6\x9d\xe2\x9b\xb2\xd1\xd6CK\x8b)\xaewZ\xd8\xc2'
-                        b'\xe4\x8cS\x91',
-            'sha256': b"\xe3\xb0\xc4B\x98\xfc\x1c\x14\x9a\xfb\xf4\xc8\x99o"
-                      b"\xb9$'\xaeA\xe4d\x9b\x93L\xa4\x95\x99\x1bxR\xb8U",
-            'blake2s256': b'i!z0y\x90\x80\x94\xe1\x11!\xd0B5J|\x1fU\xb6H,\xa1'
-                          b'\xa5\x1e\x1b%\r\xfd\x1e\xd0\xee\xf9',
-            'perms': git.GitPerm.BLOB,
-            'path': path,
-            'name': name,
-            'type': git.GitType.BLOB,
-            'length': 0
-        }
-
-        self.assertEquals(actual_metadata, expected_metadata)
-
-        # cleanup
-        shutil.rmtree(tmp_root_path)
-
-
-@attr('fs')
-class GitHashWalkArborescenceTree:
-    """Root class to ease walk and git hash testing without side-effecty
-    problems.
-
-    """
-    def setUp(self):
-        super().setUp()
-        self.tmp_root_path = tempfile.mkdtemp().encode('utf-8')
-        self.maxDiff = None
-
-        start_path = os.path.dirname(__file__).encode('utf-8')
-        sample_folder = os.path.join(start_path,
-                                     b'../../../..',
-                                     b'swh-storage-testdata',
-                                     b'dir-folders',
-                                     b'sample-folder.tgz')
-
-        self.root_path = os.path.join(self.tmp_root_path, b'sample-folder')
-
-        # uncompress the sample folder
-        subprocess.check_output(
-            ['tar', 'xvf', sample_folder, '-C', self.tmp_root_path])
-
-    def tearDown(self):
-        if os.path.exists(self.tmp_root_path):
-            shutil.rmtree(self.tmp_root_path)
-
-
-class GitHashFromScratch(GitHashWalkArborescenceTree, unittest.TestCase):
-    """Test the main `walk_and_compute_sha1_from_directory` algorithm that
-    scans and compute the disk for checksums.
-
-    """
-    @istest
-    def walk_and_compute_sha1_from_directory(self):
-        # make a temporary arborescence tree to hash without ignoring anything
-        # same as previous behavior
-        walk0 = git.walk_and_compute_sha1_from_directory(self.tmp_root_path)
-
-        keys0 = list(walk0.keys())
-        path_excluded = os.path.join(self.tmp_root_path,
-                                     b'sample-folder',
-                                     b'foo')
-        self.assertTrue(path_excluded in keys0)  # it is not excluded here
-
-        # make the same temporary arborescence tree to hash with ignoring one
-        # folder foo
-        walk1 = git.walk_and_compute_sha1_from_directory(
-            self.tmp_root_path,
-            dir_ok_fn=lambda dirpath: b'sample-folder/foo' not in dirpath)
-        keys1 = list(walk1.keys())
-        self.assertTrue(path_excluded not in keys1)
-
-        # remove the keys that can't be the same (due to hash definition)
-        # Those are the top level folders
-        keys_diff = [self.tmp_root_path,
-                     os.path.join(self.tmp_root_path, b'sample-folder'),
-                     git.ROOT_TREE_KEY]
-        for k in keys_diff:
-            self.assertNotEquals(walk0[k], walk1[k])
-
-        # The remaining keys (bottom path) should have exactly the same hashes
-        # as before
-        keys = set(keys1) - set(keys_diff)
-        actual_walk1 = {}
-        for k in keys:
-            self.assertEquals(walk0[k], walk1[k])
-            actual_walk1[k] = walk1[k]
-
-        expected_checksums = {
-            os.path.join(self.tmp_root_path, b'sample-folder/empty-folder'): [],                                                      # noqa
-            os.path.join(self.tmp_root_path, b'sample-folder/bar/barfoo'): [{                                                         # noqa
-                'type': git.GitType.BLOB,                                                                                             # noqa
-                'length': 72,
-                'sha256': b'=\xb5\xae\x16\x80U\xbc\xd9:M\x08(]\xc9\x9f\xfe\xe2\x883\x03\xb2?\xac^\xab\x85\x02s\xa8\xeaUF',            # noqa
-                'name': b'another-quote.org',                                                                                         # noqa
-                'path': os.path.join(self.tmp_root_path, b'sample-folder/bar/barfoo/another-quote.org'),                              # noqa
-                'perms': git.GitPerm.BLOB,                                                                                            # noqa
-                'sha1': b'\x90\xa6\x13\x8b\xa5\x99\x15&\x1e\x17\x99H8j\xa1\xcc*\xa9"\n',                                              # noqa
-                'blake2s256': b'\xd2l\x1c\xad\x82\xd4=\xf0\xbf\xfa^{\xe1\x1a`\xe3J\xdb\x85\xa2\x18\xb43\xcb\xceRx\xb1\x0b\x95O\xe8',  # noqa
-                'sha1_git': b'\x136\x93\xb1%\xba\xd2\xb4\xac1\x855\xb8I\x01\xeb\xb1\xf6\xb68'}],                                      # noqa
-            os.path.join(self.tmp_root_path, b'sample-folder/bar'): [{                                                                # noqa
-                'type': git.GitType.TREE,                                                                                             # noqa
-                'perms': git.GitPerm.TREE,                                                                                            # noqa
-                'name': b'barfoo',                                                                                                    # noqa
-                'path': os.path.join(self.tmp_root_path, b'sample-folder/bar/barfoo'),                                                # noqa
-                'sha1_git': b'\xc3\x02\x0fk\xf15\xa3\x8cm\xf3\xaf\xeb_\xb3\x822\xc5\xe0p\x87'}]}                                      # noqa
-
-        self.assertEquals(actual_walk1, expected_checksums)
-
-    @istest
-    def walk_and_compute_sha1_from_directory_without_root_tree(self):
-        # compute the full checksums
-        expected_hashes = git.walk_and_compute_sha1_from_directory(
-            self.tmp_root_path)
-
-        # except for the key on that round
-        actual_hashes = git.walk_and_compute_sha1_from_directory(
-            self.tmp_root_path,
-            with_root_tree=False)
-
-        # then, removing the root tree hash from the first round
-        del expected_hashes[git.ROOT_TREE_KEY]
-
-        # should give us the same checksums as the second round
-        self.assertEquals(actual_hashes, expected_hashes)
-
-
-class WithSampleFolderChecksums:
-    def setUp(self):
-        super().setUp()
-
-        self.rootkey = b'/tmp/tmp7w3oi_j8'
-
-        self.objects = {
-            b'/tmp/tmp7w3oi_j8': {
-                'children': {b'/tmp/tmp7w3oi_j8/sample-folder'},
-                'checksums': {
-                    'type': git.GitType.TREE,
-                    'name': b'tmp7w3oi_j8',
-                    'sha1_git': b'\xa7A\xfcM\x96\x8c{\x8e<\x94\xff\x86\xe7\x04\x80\xc5\xc7\xe5r\xa9',  # noqa
-                    'path': b'/tmp/tmp7w3oi_j8',
-                    'perms': git.GitPerm.TREE
-                },
-            },
-            b'/tmp/tmp7w3oi_j8/sample-folder': {
-                'children': {
-                    b'/tmp/tmp7w3oi_j8/sample-folder/empty-folder',
-                    b'/tmp/tmp7w3oi_j8/sample-folder/link-to-binary',
-                    b'/tmp/tmp7w3oi_j8/sample-folder/link-to-another-quote',
-                    b'/tmp/tmp7w3oi_j8/sample-folder/link-to-foo',
-                    b'/tmp/tmp7w3oi_j8/sample-folder/some-binary',
-                    b'/tmp/tmp7w3oi_j8/sample-folder/bar',
-                    b'/tmp/tmp7w3oi_j8/sample-folder/foo',
-                },
-                'checksums': {
-                    'type': git.GitType.TREE,
-                    'name': b'sample-folder',
-                    'sha1_git': b'\xe8\xb0\xf1Fj\xf8`\x8c\x8a?\xb9\x87\x9d\xb1r\xb8\x87\xe8\x07Y',  # noqa
-                    'path': b'/tmp/tmp7w3oi_j8/sample-folder',
-                    'perms': git.GitPerm.TREE}
-            },
-            b'/tmp/tmp7w3oi_j8/sample-folder/empty-folder': {
-                'children': {},
-                'checksums': {
-                    'type': git.GitType.TREE,
-                    'name': b'empty-folder',
-                    'sha1_git': b'K\x82]\xc6B\xcbn\xb9\xa0`\xe5K\xf8\xd6\x92\x88\xfb\xeeI\x04',  # noqa
-                    'path': b'/tmp/tmp7w3oi_j8/sample-folder/empty-folder',
-                    'perms': git.GitPerm.TREE
-                }
-            },
-            b'/tmp/tmp7w3oi_j8/sample-folder/link-to-binary': {
-                'checksums': {
-                    'name': b'link-to-binary',
-                    'sha1': b'\xd0$\x87\x14\x94\x8b:H\xa2T8#*o\x99\xf01\x8fY\xf1',  # noqa
-                    'data': b'some-binary',
-                    'sha1_git': b'\xe8kE\xe58\xd9\xb6\x88\x8c\x96\x9c\x89\xfb\xd2*\x85\xaa\x0e\x03f',  # noqa
-                    'blake2s256': b'\x9c\xe1\x8b\x1a\xde\xcb3\xf8\x91\xca6fM\xa6v\xe1,w,\xc1\x93w\x8a\xac\x9a\x13{\x8d\xc5\x83K\x9b',  # noqa
-                    'path': b'/tmp/tmp7w3oi_j8/sample-folder/link-to-binary',
-                    'sha256': b'\x14\x12n\x97\xd8?}&\x1cZh\x89\xce\xe76\x19w\x0f\xf0\x9e@\xc5I\x86\x85\xab\xa7E\xbe\x88.\xff',  # noqa
-                    'perms': git.GitPerm.LINK,
-                    'type': git.GitType.BLOB,
-                    'length': 11
-                }
-            },
-            b'/tmp/tmp7w3oi_j8/sample-folder/link-to-another-quote': {
-                'checksums': {
-                    'name': b'link-to-another-quote',
-                    'sha1': b'\xcb\xee\xd1^yY\x9c\x90\xdes\x83\xf4 \xfe\xd7\xac\xb4\x8e\xa1q',  # noqa
-                    'data': b'bar/barfoo/another-quote.org',
-                    'sha1_git': b'}\\\x08\x11\x1e!\xc8\xa9\xf7\x15@\x93\x99\x98U\x16\x837_\xad',  # noqa
-                    'blake2s256': b"-\x0es\xce\xa0\x1b\xa9I\xc1\x02-\xc1\x0c\x8aC\xe6a\x80c\x96b\xe5\xdc'7\xb8C8/{\x19\x10",  # noqa
-                    'path': b'/tmp/tmp7w3oi_j8/sample-folder/link-to-another-quote',  # noqa
-                    'sha256': b'\xe6\xe1}\x07\x93\xaau\n\x04@\xeb\x9a\xd5\xb8\x0b%\x80vc~\xf0\xfbh\xf3\xac.Y\xe4\xb9\xac;\xa6',  # noqa
-                    'perms': git.GitPerm.LINK,
-                    'type': git.GitType.BLOB,
-                    'length': 28
-                }
-            },
-            b'/tmp/tmp7w3oi_j8/sample-folder/link-to-foo': {
-                'checksums': {
-                    'name': b'link-to-foo',
-                    'sha1': b'\x0b\xee\xc7\xb5\xea?\x0f\xdb\xc9]\r\xd4\x7f<[\xc2u\xda\x8a3',  # noqa
-                    'data': b'foo',
-                    'sha1_git': b'\x19\x10(\x15f=#\xf8\xb7ZG\xe7\xa0\x19e\xdc\xdc\x96F\x8c',  # noqa
-                    'blake2s256': b'\x08\xd6\xca\xd8\x80u\xde\x8f\x19-\xb0\x97W=\x0e\x82\x94\x11\xcd\x91\xebn\xc6^\x8f\xc1l\x01~\xdf\xdbt',  # noqa
-                    'path': b'/tmp/tmp7w3oi_j8/sample-folder/link-to-foo',
-                    'sha256': b',&\xb4kh\xff\xc6\x8f\xf9\x9bE<\x1d0A4\x13B-pd\x83\xbf\xa0\xf9\x8a^\x88bf\xe7\xae',  # noqa
-                    'perms': git.GitPerm.LINK,
-                    'type': git.GitType.BLOB,
-                    'length': 3
-                }
-            },
-            b'/tmp/tmp7w3oi_j8/sample-folder/some-binary': {
-                'checksums': {
-                    'name': b'some-binary',
-                    'sha1': b'\x0b\xbc\x12\xd7\xf4\xa2\xa1[\x14=\xa8F\x17\xd9\\\xb2#\xc9\xb2<',  # noqa
-                    'sha1_git': b'hv\x95y\xc3\xea\xad\xbeUSy\xb9\xc3S\x8ef(\xba\xe1\xeb',  # noqa
-                    'blake2s256': b"\x92.\x0fp\x15\x03R\x12I[\t\x0c'WsW\xa7@\xdd\xd7{\x0b\x9e\x0c\xd2;T\x80\xc0z\x18\xc6",  # noqa
-                    'path': b'/tmp/tmp7w3oi_j8/sample-folder/some-binary',
-                    'sha256': b'\xba\xc6P\xd3Jv8\xbb\n\xebSBdm$\xe3\xb9\xadkD\xc9\xb3\x83b\x1f\xaaH+\x99\n6}',  # noqa
-                    'perms': git.GitPerm.EXEC,
-                    'type': git.GitType.BLOB,
-                    'length': 5}
-            },
-            b'/tmp/tmp7w3oi_j8/sample-folder/bar': {
-                'children': {b'/tmp/tmp7w3oi_j8/sample-folder/bar/barfoo'},
-                'checksums': {'type': git.GitType.TREE,
-                              'name': b'bar',
-                              'sha1_git': b'<\x1fW\x83\x94\xf4b?t\xa0\xba\x7f\xe7ar\x9fY\xfcn\xc4',  # noqa
-                              'path': b'/tmp/tmp7w3oi_j8/sample-folder/bar',
-                              'perms': git.GitPerm.TREE},
-            },
-            b'/tmp/tmp7w3oi_j8/sample-folder/bar/barfoo': {
-                'children': {b'/tmp/tmp7w3oi_j8/sample-folder/bar/barfoo/another-quote.org'},  # noqa
-                'checksums': {'type': git.GitType.TREE,
-                              'name': b'barfoo',
-                              'sha1_git': b'\xc3\x02\x0fk\xf15\xa3\x8cm\xf3\xaf\xeb_\xb3\x822\xc5\xe0p\x87',  # noqa
-                              'path': b'/tmp/tmp7w3oi_j8/sample-folder/bar/barfoo',  # noqa
-                              'perms': git.GitPerm.TREE},
-            },
-            b'/tmp/tmp7w3oi_j8/sample-folder/bar/barfoo/another-quote.org': {
-                'checksums': {'name': b'another-quote.org',
-                              'sha1': b'\x90\xa6\x13\x8b\xa5\x99\x15&\x1e\x17\x99H8j\xa1\xcc*\xa9"\n',  # noqa
-                              'sha1_git': b'\x136\x93\xb1%\xba\xd2\xb4\xac1\x855\xb8I\x01\xeb\xb1\xf6\xb68',  # noqa
-                              'blake2s256': b'\xd2l\x1c\xad\x82\xd4=\xf0\xbf\xfa^{\xe1\x1a`\xe3J\xdb\x85\xa2\x18\xb43\xcb\xceRx\xb1\x0b\x95O\xe8',  # noqa
-                              'path': b'/tmp/tmp7w3oi_j8/sample-folder/bar/barfoo/another-quote.org',  # noqa
-                              'sha256': b'=\xb5\xae\x16\x80U\xbc\xd9:M\x08(]\xc9\x9f\xfe\xe2\x883\x03\xb2?\xac^\xab\x85\x02s\xa8\xeaUF',  # noqa
-                              'perms': git.GitPerm.BLOB,
-                              'type': git.GitType.BLOB,
-                              'length': 72}
-            },
-            b'/tmp/tmp7w3oi_j8/sample-folder/foo': {
-                'children': {
-                    b'/tmp/tmp7w3oi_j8/sample-folder/foo/barfoo',
-                    b'/tmp/tmp7w3oi_j8/sample-folder/foo/rel-link-to-barfoo',
-                    b'/tmp/tmp7w3oi_j8/sample-folder/foo/quotes.md',
-                },
-                'checksums': {'type': git.GitType.TREE,
-                              'name': b'foo',
-                              'sha1_git': b'+A\xc4\x0f\r\x1f\xbf\xfc\xba\x12I}\xb7\x1f\xba\x83\xfc\xca\x96\xe5',  # noqa
-                              'path': b'/tmp/tmp7w3oi_j8/sample-folder/foo',
-                              'perms': git.GitPerm.TREE}
-            },
-            b'/tmp/tmp7w3oi_j8/sample-folder/foo/barfoo': {
-                'checksums': {'name': b'barfoo',
-                              'sha1': b'\x90W\xeem\x01bPn\x01\xc4\xd9\xd5E\x9az\xdd\x1f\xed\xac7',  # noqa
-                              'data': b'bar/barfoo',
-                              'sha1_git': b'\x81\x85\xdf\xb2\xc0\xc2\xc5\x97\xd1ou\xa8\xa0\xc3vhV|=~',  # noqa
-                              'blake2s256': b'\xe1%/,\xaaJre<N\xfd\x9a\xf8q\xb6+\xf2\xab\xb7\xbb/\x1b\x0e\x95\x96\x92\x04\xbd\x8ap\xd4\xcd',  # noqa
-                              'path': b'/tmp/tmp7w3oi_j8/sample-folder/foo/barfoo',  # noqa
-                              'sha256': b')\xad?W%2\x1b\x94\x032\xc7\x8e@6\x01\xaf\xffa\xda\xea\x85\xe9\xc8\x0bJpc\xb6\x88~\xadh',  # noqa
-                              'perms': git.GitPerm.LINK,
-                              'type': git.GitType.BLOB,
-                              'length': 10}
-            },
-            b'/tmp/tmp7w3oi_j8/sample-folder/foo/rel-link-to-barfoo': {
-                'checksums': {'name': b'rel-link-to-barfoo',
-                              'sha1': b'\xdcQ"\x1d0\x8f:\xeb\'T\xdbH9\x1b\x85h|(i\xf4',  # noqa
-                              'data': b'../bar/barfoo',
-                              'sha1_git': b'\xac\xac2m\xddc\xb0\xbcp\x84\x06Y\xd4\xacCa\x94\x84\xe6\x9f',  # noqa
-                              'path': b'/tmp/tmp7w3oi_j8/sample-folder/foo/rel-link-to-barfoo',  # noqa
-                              'sha256': b'\x80\x07\xd2\r\xb2\xaf@C_B\xdd\xefK\x8a\xd7k\x80\xad\xbe\xc2k$\x9f\xdf\x04s5?\x8d\x99\xdf\x08',  # noqa
-                              'blake2s256': b"\xd9\xc3'B\x15\x88\xa1\xcfa\xf3\x16aP\x05\xa2\xe9\xc1:\xc3\xa4\xe9mC\xa2A8\xd7\x18\xfa\x0e0\xdb",  # noqa
-                              'perms': git.GitPerm.LINK,
-                              'type': git.GitType.BLOB,
-                              'length': 13}
-            },
-            b'/tmp/tmp7w3oi_j8/sample-folder/foo/quotes.md': {
-                'checksums': {'name': b'quotes.md',
-                              'sha1': b'\x1b\xf0\xbbr\x1a\xc9,\x18\xa1\x9b\x13\xc0\xeb=t\x1c\xbf\xad\xeb\xfc',  # noqa
-                              'sha1_git': b'|LW\xba\x9f\xf4\x96\xad\x17\x9b\x8fe\xb1\xd2\x86\xed\xbd\xa3L\x9a',  # noqa
-                              'blake2s256': b'\xbf|\xe4\xfe0Cxe\x1e\xe64\x8d>\x936\xedZ\xd6\x03\xd3>\x83\xc8;\xa4\xe1KF\xf9\xb8\xa8\x0b',  # noqa
-                              'path': b'/tmp/tmp7w3oi_j8/sample-folder/foo/quotes.md',  # noqa
-                              'sha256': b'\xca\xca\x94*\xed\xa7\xb3\x08\x85\x9e\xb5o\x90\x9e\xc9m\x07\xa4\x99I\x16\x90\xc4S\xf7;\x98\x00\xa9;\x16Y',  # noqa
-                              'perms': git.GitPerm.BLOB,
-                              'type': git.GitType.BLOB,
-                              'length': 66}
-            },
-        }
-
-
-class TestObjectsPerType(WithSampleFolderChecksums, unittest.TestCase):
-    @istest
-    def objects_per_type_blob(self):
-        # given
-        expected_blobs = [
-            {
-                'name': b'another-quote.org',
-                'sha1': b'\x90\xa6\x13\x8b\xa5\x99\x15&\x1e\x17\x99H8j\xa1\xcc*\xa9"\n',  # noqa
-                'sha1_git': b'\x136\x93\xb1%\xba\xd2\xb4\xac1\x855\xb8I\x01\xeb\xb1\xf6\xb68',  # noqa
-                'path': b'/tmp/tmp7w3oi_j8/sample-folder/bar/barfoo/another-quote.org',  # noqa
-                'sha256': b'=\xb5\xae\x16\x80U\xbc\xd9:M\x08(]\xc9\x9f\xfe\xe2\x883\x03\xb2?\xac^\xab\x85\x02s\xa8\xeaUF',  # noqa
-                'perms': git.GitPerm.BLOB,
-                'type': git.GitType.BLOB,
-                'length': 72
-            },
-            {
-                'name': b'link-to-binary',
-                'sha1': b'\xd0$\x87\x14\x94\x8b:H\xa2T8#*o\x99\xf01\x8fY\xf1',
-                'data': b'some-binary',
-                'sha1_git': b'\xe8kE\xe58\xd9\xb6\x88\x8c\x96\x9c\x89\xfb\xd2*\x85\xaa\x0e\x03f',  # noqa
-                'path': b'/tmp/tmp7w3oi_j8/sample-folder/link-to-binary',
-                'sha256': b'\x14\x12n\x97\xd8?}&\x1cZh\x89\xce\xe76\x19w\x0f\xf0\x9e@\xc5I\x86\x85\xab\xa7E\xbe\x88.\xff',  # noqa
-                'perms': git.GitPerm.LINK,
-                'type': git.GitType.BLOB,
-                'length': 11
-            },
-            {
-                'name': b'link-to-another-quote',
-                'sha1': b'\xcb\xee\xd1^yY\x9c\x90\xdes\x83\xf4 \xfe\xd7\xac\xb4\x8e\xa1q',  # noqa
-                'data': b'bar/barfoo/another-quote.org',
-                'sha1_git': b'}\\\x08\x11\x1e!\xc8\xa9\xf7\x15@\x93\x99\x98U\x16\x837_\xad',  # noqa
-                'path': b'/tmp/tmp7w3oi_j8/sample-folder/link-to-another-quote',  # noqa
-                'sha256': b'\xe6\xe1}\x07\x93\xaau\n\x04@\xeb\x9a\xd5\xb8\x0b%\x80vc~\xf0\xfbh\xf3\xac.Y\xe4\xb9\xac;\xa6',  # noqa
-                'perms': git.GitPerm.LINK,
-                'type': git.GitType.BLOB,
-                'length': 28
-            },
-            {
-                'name': b'link-to-foo',
-                'sha1': b'\x0b\xee\xc7\xb5\xea?\x0f\xdb\xc9]\r\xd4\x7f<[\xc2u\xda\x8a3',  # noqa
-                'data': b'foo',
-                'sha1_git': b'\x19\x10(\x15f=#\xf8\xb7ZG\xe7\xa0\x19e\xdc\xdc\x96F\x8c',  # noqa
-                'path': b'/tmp/tmp7w3oi_j8/sample-folder/link-to-foo',
-                'sha256': b',&\xb4kh\xff\xc6\x8f\xf9\x9bE<\x1d0A4\x13B-pd\x83\xbf\xa0\xf9\x8a^\x88bf\xe7\xae',  # noqa
-                'perms': git.GitPerm.LINK,
-                'type': git.GitType.BLOB,
-                'length': 3
-            },
-            {
-                'name': b'some-binary',
-                'sha1': b'\x0b\xbc\x12\xd7\xf4\xa2\xa1[\x14=\xa8F\x17\xd9\\\xb2#\xc9\xb2<',  # noqa
-                'sha1_git': b'hv\x95y\xc3\xea\xad\xbeUSy\xb9\xc3S\x8ef(\xba\xe1\xeb',  # noqa
-                'path': b'/tmp/tmp7w3oi_j8/sample-folder/some-binary',
-                'sha256': b'\xba\xc6P\xd3Jv8\xbb\n\xebSBdm$\xe3\xb9\xadkD\xc9\xb3\x83b\x1f\xaaH+\x99\n6}',  # noqa
-                'perms': git.GitPerm.EXEC,
-                'type': git.GitType.BLOB,
-                'length': 5
-            },
-            {
-                'name': b'barfoo',
-                'sha1': b'\x90W\xeem\x01bPn\x01\xc4\xd9\xd5E\x9az\xdd\x1f\xed\xac7',  # noqa
-                'data': b'bar/barfoo',
-                'sha1_git': b'\x81\x85\xdf\xb2\xc0\xc2\xc5\x97\xd1ou\xa8\xa0\xc3vhV|=~',  # noqa
-                'path': b'/tmp/tmp7w3oi_j8/sample-folder/foo/barfoo',
-                'sha256': b')\xad?W%2\x1b\x94\x032\xc7\x8e@6\x01\xaf\xffa\xda\xea\x85\xe9\xc8\x0bJpc\xb6\x88~\xadh',  # noqa
-                'perms': git.GitPerm.LINK,
-                'type': git.GitType.BLOB,
-                'length': 10
-            },
-            {
-                'name': b'rel-link-to-barfoo',
-                'sha1': b'\xdcQ"\x1d0\x8f:\xeb\'T\xdbH9\x1b\x85h|(i\xf4',
-                'data': b'../bar/barfoo',
-                'sha1_git': b'\xac\xac2m\xddc\xb0\xbcp\x84\x06Y\xd4\xacCa\x94\x84\xe6\x9f',  # noqa
-                'path': b'/tmp/tmp7w3oi_j8/sample-folder/foo/rel-link-to-barfoo',  # noqa
-                'sha256': b'\x80\x07\xd2\r\xb2\xaf@C_B\xdd\xefK\x8a\xd7k\x80\xad\xbe\xc2k$\x9f\xdf\x04s5?\x8d\x99\xdf\x08',  # noqa
-                'perms': git.GitPerm.LINK,
-                'type': git.GitType.BLOB,
-                'length': 13
-            },
-            {
-                'name': b'quotes.md',
-                'sha1': b'\x1b\xf0\xbbr\x1a\xc9,\x18\xa1\x9b\x13\xc0\xeb=t\x1c\xbf\xad\xeb\xfc',  # noqa
-                'sha1_git': b'|LW\xba\x9f\xf4\x96\xad\x17\x9b\x8fe\xb1\xd2\x86\xed\xbd\xa3L\x9a',  # noqa
-                'path': b'/tmp/tmp7w3oi_j8/sample-folder/foo/quotes.md',
-                'sha256': b'\xca\xca\x94*\xed\xa7\xb3\x08\x85\x9e\xb5o\x90\x9e\xc9m\x07\xa4\x99I\x16\x90\xc4S\xf7;\x98\x00\xa9;\x16Y',  # noqa
-                'perms': git.GitPerm.BLOB,
-                'type': git.GitType.BLOB,
-                'length': 66
-            },
-        ]
-
-        expected_sha1_blobs = set(
-            ((c['sha1_git'], git.GitType.BLOB) for c in expected_blobs))
-
-        # when
-        actual_sha1_blobs = set(
-            ((c['sha1_git'], c['type'])
-             for c in git.objects_per_type(git.GitType.BLOB, self.objects)))
-
-        # then
-        self.assertEqual(actual_sha1_blobs, expected_sha1_blobs)
-
-    @istest
-    def objects_per_type_tree(self):
-        def _children_hashes(path, objects=self.objects):
-            return set((c['sha1_git']
-                       for c in git.children_hashes(
-                           objects[path]['children'], objects)))
-
-        expected_trees = [
-            {
-                'type': git.GitType.TREE,
-                'name': b'tmp7w3oi_j8',
-                'sha1_git': b'\xa7A\xfcM\x96\x8c{\x8e<\x94\xff\x86\xe7\x04\x80\xc5\xc7\xe5r\xa9',  # noqa
-                'path': b'/tmp/tmp7w3oi_j8',
-                'perms': git.GitPerm.TREE,
-                # we only add children's sha1_git here, in reality,
-                # it's a full dict of hashes.
-                'children': _children_hashes(b'/tmp/tmp7w3oi_j8')
-            },
-            {
-                'type': git.GitType.TREE,
-                'name': b'sample-folder',
-                'sha1_git': b'\xe8\xb0\xf1Fj\xf8`\x8c\x8a?\xb9\x87\x9d\xb1r\xb8\x87\xe8\x07Y',  # noqa
-                'path': b'/tmp/tmp7w3oi_j8/sample-folder',
-                'perms': git.GitPerm.TREE,
-                'children': _children_hashes(
-                    b'/tmp/tmp7w3oi_j8/sample-folder')
-            },
-            {
-                'type': git.GitType.TREE,
-                'name': b'empty-folder',
-                'sha1_git': b'K\x82]\xc6B\xcbn\xb9\xa0`\xe5K\xf8\xd6\x92\x88\xfb\xeeI\x04',  # noqa
-                'path': b'/tmp/tmp7w3oi_j8/sample-folder/empty-folder',
-                'perms': git.GitPerm.TREE,
-                'children': _children_hashes(
-                    b'/tmp/tmp7w3oi_j8/sample-folder/empty-folder')
-            },
-            {
-                'type': git.GitType.TREE,
-                'name': b'bar',
-                'sha1_git': b'<\x1fW\x83\x94\xf4b?t\xa0\xba\x7f\xe7ar\x9fY\xfcn\xc4',  # noqa
-                'path': b'/tmp/tmp7w3oi_j8/sample-folder/bar',
-                'perms': git.GitPerm.TREE,
-                'children': _children_hashes(
-                    b'/tmp/tmp7w3oi_j8/sample-folder/bar')
-            },
-            {
-                'type': git.GitType.TREE,
-                'name': b'barfoo',
-                'sha1_git': b'\xc3\x02\x0fk\xf15\xa3\x8cm\xf3\xaf\xeb_\xb3\x822\xc5\xe0p\x87',  # noqa
-                'path': b'/tmp/tmp7w3oi_j8/sample-folder/bar/barfoo',
-                'perms': git.GitPerm.TREE,
-                'children': _children_hashes(
-                    b'/tmp/tmp7w3oi_j8/sample-folder/bar/barfoo'),
-            },
-            {
-                'type': git.GitType.TREE,
-                'name': b'foo',
-                'sha1_git': b'+A\xc4\x0f\r\x1f\xbf\xfc\xba\x12I}\xb7\x1f\xba\x83\xfc\xca\x96\xe5',  # noqa
-                'path': b'/tmp/tmp7w3oi_j8/sample-folder/foo',
-                'perms': git.GitPerm.TREE,
-                'children': _children_hashes(
-                    b'/tmp/tmp7w3oi_j8/sample-folder/foo')
-            },
-        ]
-        expected_sha1_trees = list(
-            ((c['sha1_git'], git.GitType.TREE, c['children'])
-             for c in expected_trees))
-
-        # when
-        actual_sha1_trees = list(
-            ((c['sha1_git'], c['type'], _children_hashes(c['path']))
-             for c in git.objects_per_type(git.GitType.TREE, self.objects)))
-
-        self.assertEquals(len(actual_sha1_trees), len(expected_sha1_trees))
-        for e in actual_sha1_trees:
-            self.assertTrue(e in expected_sha1_trees)
-
-
-class TestComputeHashesFromDirectory(WithSampleFolderChecksums,
-                                     GitHashWalkArborescenceTree,
-                                     unittest.TestCase):
-
-    def __adapt_object_to_rootpath(self, rootpath):
-        def _replace_slash(s,
-                           rootpath=self.rootkey,
-                           newrootpath=rootpath):
-            return s.replace(rootpath, newrootpath)
-
-        def _update_children(children):
-            return set((_replace_slash(c) for c in children))
-
-        # given
-        expected_objects = {}
-        for path, v in self.objects.items():
-            p = _replace_slash(path)
-            v['checksums']['path'] = _replace_slash(v['checksums']['path'])
-            v['checksums']['name'] = os.path.basename(v['checksums']['path'])
-            if 'children' in v:
-                v['children'] = _update_children(v['children'])
-            expected_objects[p] = v
-
-        return expected_objects
-
-    @istest
-    def compute_hashes_from_directory_default(self):
-        # given
-        expected_objects = self.__adapt_object_to_rootpath(self.tmp_root_path)
-
-        # when
-        actual_hashes = git.compute_hashes_from_directory(self.tmp_root_path)
-
-        # then
-        self.assertEquals(actual_hashes, expected_objects)
-
-    @istest
-    def compute_hashes_from_directory_no_empty_folder(self):
-        # given
-        def _replace_slash(s,
-                           rootpath=self.rootkey,
-                           newrootpath=self.tmp_root_path):
-            return s.replace(rootpath, newrootpath)
-
-        expected_objects = self.__adapt_object_to_rootpath(self.tmp_root_path)
-
-        # when
-        actual_hashes = git.compute_hashes_from_directory(
-            self.tmp_root_path,
-            remove_empty_folder=True)
-
-        # then
-
-        # One folder less, so plenty of hashes are different now
-        self.assertNotEquals(actual_hashes, expected_objects)
-        keys = set(actual_hashes.keys())
-
-        assert (b'/tmp/tmp7w3oi_j8/sample-folder/empty-folder'
-                in self.objects.keys())
-        new_empty_folder_path = _replace_slash(
-            b'/tmp/tmp7w3oi_j8/sample-folder/empty-folder')
-        self.assertNotIn(new_empty_folder_path, keys)
-
-        self.assertEqual(len(keys), len(expected_objects.keys()) - 1)
-
-    @istest
-    def compute_hashes_from_directory_ignore_some_folder(self):
-        # given
-        def _replace_slash(s,
-                           rootpath=self.rootkey,
-                           newrootpath=self.tmp_root_path):
-            return s.replace(rootpath, newrootpath)
-
-        ignore_path = b'/tmp/tmp7w3oi_j8/sample-folder'
-
-        # when
-        actual_hashes = git.compute_hashes_from_directory(
-            self.tmp_root_path,
-            dir_ok_fn=lambda dirpath: b'sample-folder' not in dirpath)
-
-        # then
-
-        # One entry less, so plenty of hashes are different now
-        keys = set(actual_hashes.keys())
-
-        assert ignore_path in self.objects.keys()
-
-        new_ignore_path = _replace_slash(ignore_path)
-        self.assertNotIn(new_ignore_path, keys)
-
-        # top level directory contains the folder to ignore
-        self.assertEqual(len(keys), 1)
diff --git a/swh/model/tests/test_git_slow.py b/swh/model/tests/test_git_slow.py
deleted file mode 100644
index ac5f63e899e34cadb202337e0009d1d41ba300b9..0000000000000000000000000000000000000000
--- a/swh/model/tests/test_git_slow.py
+++ /dev/null
@@ -1,404 +0,0 @@
-# Copyright (C) 2015  The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import unittest
-
-from nose.tools import istest
-from nose.plugins.attrib import attr
-
-from swh.model import hashutil
-
-from swh.model import git
-
-
-_type_to_git_type = {
-    'blob': git.GitType.BLOB,
-    'tree': git.GitType.TREE,
-}
-
-
-_perms_to_git_perm = {
-    '100644': git.GitPerm.BLOB,
-    '120000': git.GitPerm.LINK,
-    '040000': git.GitPerm.TREE,
-    '100755': git.GitPerm.EXEC
-}
-
-
-def to_bytes(path):
-    """Convert the string to bytes.
-
-    """
-    return path.encode('utf-8', errors='surrogateescape')
-
-
-def to_hash_data_entry(ls_tree_format_input_line):
-    def prepare_str(s):
-        return s.strip().replace('\t', ' ').replace('    ', ' ')
-
-    prepared_str = prepare_str(ls_tree_format_input_line)
-    perms, type, sha1_git, name = prepared_str.split(' ')
-    return {'perms': _perms_to_git_perm[perms],
-            'name': to_bytes(name),
-            'type': _type_to_git_type[type],
-            'sha1_git': bytes.fromhex(sha1_git)}
-
-
-def to_hash_data(path, ls_tree_format_input):
-    entry_lines = ls_tree_format_input.strip().split('\n')
-    return {path: list(map(to_hash_data_entry, entry_lines))}
-
-
-def compute_tree_hash(dirpath, ls_tree_format_input, hex_output):
-    hashes = to_hash_data(dirpath, ls_tree_format_input)
-    bin_hash = git.compute_directory_git_sha1(dirpath, hashes)
-    return hashutil.hash_to_hex(bin_hash)
-
-
-@attr('slow')
-class GitHashTreelib(unittest.TestCase):
-    def setUp(self):
-        self.to_checks = {
-            'e8014cb75cfe9fdb4603ce869eeeb12c53e646d9': """
-040000 tree a1e4db2944541e47088e72830464c2ffd3935f47    testing
-040000 tree f9375bba7c6d1aabec5ff90b0af53af526b7fc0d    obsolete
-100644 blob 1fafc4b0753b4eedf0bc00351286ff864745ab07    README
-040000 tree 30d8382c42e9fd66f332d2bebfa44d044afe9d95    removed
-040000 tree f3b14ca3821d7d2839713925642261e892270c88    stable
-    """,
-    '30d8382c42e9fd66f332d2bebfa44d044afe9d95': """
-100644 blob a173aecc2f18aedddf1c9882808654febffe0d20    net_dma
-100644 blob 0020c49933c45ab0b61cd7e57fa9b4baa672d3c0    devfs
-100644 blob c2310b6676f4c78be0a8f8b46ed45a126ca5e57a    dv1394
-100644 blob 3243613bc2d2095c86fdd878236dfe08ed0cfe84    ip_queue
-100644 blob 20c91adca6d412102dabf73d6b6f387a60d888ec    o2cb
-100644 blob ec333e67632266a935daa6e2124744c09caa8d77    raw1394
-100644 blob c39c25aee77b13e6d92e46686000ac2d8978da51    video1394
-    """,
-
-    'f3b14ca3821d7d2839713925642261e892270c88': """
-100644 blob 16d030827368b2c49cbbe396588635dfa69d6c08    firewire-cdev
-100644 blob 5eb1545e0b8d2aea38138d8ff43f4045a6b6f729    o2cb
-100644 blob c3ae3e7d6a0ccdddedcc61db54910fef59dd54d3    syscalls
-100644 blob 964c7a8afb268ae004364b0d71117efa51261dc3    sysfs-acpi-pmprofile
-100644 blob 41e5a0cd1e3ed334234c4f3e9e3db1e2fa021dfc    sysfs-bus-firewire
-100644 blob 831f15d9672f29e90cca5650356d2f69599e14b8    sysfs-bus-usb
-100644 blob 140d85b4ae92faff6d3646735b04974c530c604b    sysfs-bus-w1
-100644 blob 3d5951c8bf5fe8b27f47b016289c910f90af97e6    sysfs-bus-xen-backend
-100644 blob 70302f370e7ec1c1d46e4d278f41319e1ce536c1    sysfs-class-backlight
-100644 blob 097f522c33bb7b5c3632a9ca200e099fea32b2cf    sysfs-class-rfkill
-100644 blob 26579ee868c9374ba92d3c1121c94310aacc38b4    sysfs-driver-w1_ds28e04
-100644 blob 9f790eebb5d2b0f4d35c317073c72791b41a20b3    sysfs-class-tpm
-100644 blob 18d471d9faea9bdec594a5bada594b4062ab66fb    sysfs-class-ubi
-100644 blob 85d3dac2e204cfb649969ec6f7570522fb59ed4a    sysfs-class-udc
-100644 blob 43f78b88da28beaa556b3bba509f1ac97fa44c16    sysfs-devices
-100644 blob 5b2d0f08867cd899df072f89a059995944fb8eec    sysfs-devices-node
-100644 blob 33c133e2a631a0d390353f76e4ad0697a568c60f    sysfs-devices-system-cpu
-100644 blob caa311d59ac1d24c92643f37b396407a1ab654f0    sysfs-devices-system-xen_memory
-100644 blob 7049a2b5035950f3d08dc9e8595a7d40e73036e6    sysfs-driver-ib_srp
-100644 blob 9a59d84497edb7c7600e546542b3f4dfbccbe1d2    sysfs-driver-qla2xxx
-100644 blob e960cd027e1e9685a83f3275ca859da7a793e116    sysfs-driver-usb-usbtmc
-100644 blob e928def14f28c7487e7a319f94a9c1527aaecd8d    sysfs-driver-w1_ds28ea00
-100644 blob 5def20b9019e93299ed111d53c338e705b1e2639    sysfs-firmware-efi-vars
-100644 blob 32fe7f5c488069c64b8c37951b6dfcfa90f4eb57    sysfs-firmware-opal-dump
-100644 blob e1f3058f5954d062796d12feb153d5d025c38495    sysfs-firmware-opal-elog
-100644 blob 6272ae5fb36699b9f47276c85ec313185e43a9cf    sysfs-module
-100644 blob 636e938d5e33a4e9331a328a05a6b93a0b538e60    sysfs-bus-vmbus
-100644 blob ec7af69fea0afd9fe57f6600adc6b9be8fceb90d    sysfs-transport-srp
-100644 blob 9723e8b7aeb3125b352b75bc54a0ad0ea7aa2474    thermal-notification
-100644 blob 7cdfc28cc2c6d93b861d6ec3acb05bc5aca8bc70    vdso
-    """,  # NOQA
-
-    '367b37ab86e8066a46ed8ed81b37e78138aeb7d5': """
-    100644 blob 8b7c72f07c92fe87cc7170ecc4fd1edf80fe7791    .gitignore
-    100644 blob 06871b0c08a6e9fb5d38f5b1e4d5dfb90135f2f2    Makefile
-    100755 blob 8f2629b41c5f5167be37fd3e7dee74dc9b67d2a6    micctrl
-    100755 blob 582aad4811ae802844ebeb37d51cc9a1ffec68a8    mpss
-    100644 blob 3c5c379fc29d6797d0ce17a837cbda64278f68b3    mpssd.c
-    100644 blob f5f18b15d9a057cc6e8d5d1b007424da4d765c0b    mpssd.h
-    100644 blob 8dd32693608357df350619a8da6668fb3241afd9    sysfs.c
-    """,
-            '1f4fa162adf287b4fa3fb762cf54dafc0e671f57': """
-100644 blob cd077ca0e1b86dfba53b3bd2d0fa62724eb24eb4	00-INDEX
-040000 tree e8014cb75cfe9fdb4603ce869eeeb12c53e646d9	ABI
-100644 blob 65022a87bf17902f9e04fe5ecff611a41ffaf4d8	BUG-HUNTING
-100644 blob f447f0516f074c700b0c78ca87fcfcf4595ea49f	Changes
-100644 blob 1684d0b4efa65880a36d0fb00cc5bff747c3e83a	CodeOfConflict
-100644 blob c06f817b3091cdb6e4be6e91dbbb98210177b370	CodingStyle
-100644 blob 55b70b903ead2e95ce1226ef0fec3612bea67189	DMA-API-HOWTO.txt
-100644 blob edccacd4f048a13e8afdb63db7d98ad41667a503	DMA-API.txt
-100644 blob b1a19835e9070dbec2f6dba3d735b8cda23abd6e	DMA-ISA-LPC.txt
-100644 blob 18dc52c4f2a0b13a42d9867c36c94f4774bf58e2	DMA-attributes.txt
-040000 tree 57c2bd8f00655df1d9ecbeab3a6b265279ae433a	DocBook
-040000 tree 902e0d5f0930c22be9b4b6dfe984fe6048626784	EDID
-100644 blob 21152d397b88ecbe45bca161444fcee38158e96b	HOWTO
-100644 blob 31d1d658827f082f66c88c3147e99be3321635cf	IPMI.txt
-100644 blob 01a675175a3674ef88a08ebb4f430dca3a4e4ec2	IRQ-affinity.txt
-100644 blob 3a8e15cba816a4ea16fb0208518046214ebff1e6	IRQ-domain.txt
-100644 blob 1011e717502162c63a04245169ac05d8f96a895a	IRQ.txt
-100644 blob 7b57fc087088f49756eeb8eaabf403bfbbd92b93	Intel-IOMMU.txt
-100644 blob bc0548201755e1a8d29614bccbd78fcbbe5a34ae	Makefile
-100644 blob a211ee8d8b447354ac3758d2f6f50b901aa41ea0	ManagementStyle
-040000 tree 5dc5d1e6756e3547edf8fd663f81ca353545df9d	PCI
-040000 tree 7bb4565fcf075c6906c4256b4aab7915c4779ee8	RCU
-100644 blob 74be14679ed891820cd9c3a7393007f8dd21d07d	SAK.txt
-100644 blob 561826f82093574bc61d887cae0436935d317c5e	SM501.txt
-100644 blob a660d494c8edcf9fc9bbaec9887ac6203bfcd60e	SecurityBugs
-100644 blob 2b7e32dfe00d95fadabc535372bea6ba343fdc59	SubmitChecklist
-100644 blob 31d372609ac00fb715a66174214d10f2ba673520	SubmittingDrivers
-100644 blob fd89b04d34f038bafd1485a8f96869828470f619	SubmittingPatches
-100644 blob 70acfbf399ebfb86f975ada4b8fbc2055b0ba673	VGA-softcursor.txt
-040000 tree bc7ec048cf540e56c5ba839ec9d85bd6eff3f2eb	accounting
-040000 tree 3f095916076e489cc63a253019e1a73693f3d3b9	acpi
-100644 blob cc2d4ac4f4042b7938e38f9f11970669292839a6	adding-syscalls.txt
-040000 tree 9850a7627679a34f8228c0abe8d06bcb4421f784	aoe
-100644 blob 77df55b0225ab331bb7268592fa5d18ed8f909c7	applying-patches.txt
-040000 tree 35fa24f995536c9d2bcf20c5f842bcc45ce83c86	arm
-040000 tree adf0f8637dc105841caeabe57ed9e631802d17fb	arm64
-100644 blob 2f2c6cdd73c0c24ab29dcd3f68034f99f17c3125	assoc_array.txt
-100644 blob b19fc34efdb17921af43bda0000b13dc82640451	atomic_ops.txt
-040000 tree 33c1cd21f36a02c691570dc7dcddf41d8331705d	auxdisplay
-040000 tree d6260d3558e94171cfa60b420c8df17a86cc7809	backlight
-100644 blob df84162132028d6771fc0da0649f54158bdac93c	bad_memory.txt
-100644 blob 8764e9f70821e4f894551f1fb1b98a881f3d3e9d	basic_profiling.txt
-100644 blob 32b6c3189d9826a53875ae6dc51ce62e9b86778b	bcache.txt
-100644 blob 6b1de70583715d7728a7a31b4612564b0178679b	binfmt_misc.txt
-040000 tree cd97febccb0fad00d0d61f0502f6e45c91ed06bf	blackfin
-040000 tree 8bbf8033be7139c9897791b4c6ec6611e83de346	block
-040000 tree dba91c80d3182baeb0a0ab56d13e49fd785ebec9	blockdev
-100644 blob d0d042c2fd5e9e319657117b3de567b2d42a995a	braille-console.txt
-100644 blob d8297e4ebd265eb5dd273bad20162e51d369b25a	bt8xxgpio.txt
-100644 blob 34916a46c0997dd58e1922a48e08038aab930e02	btmrvl.txt
-040000 tree 39641366356afa81c2a52aceeb914f2566c1f4ca	bus-devices
-100644 blob 2bc55ff3b4d1e2db24906a41ba71e7da8b900688	bus-virt-phys-mapping.txt
-100644 blob 3f9f808b51198b3f6278621b413c872f2b0a494f	cachetlb.txt
-040000 tree 8e44d0125c48edbffce58fa03aeaac213868d1ab	cdrom
-040000 tree 4d3a7398a2edaa5039706c89a4d7de65a3179282	cgroups
-100644 blob 88951b179262a912fcddf16872f302cf117ca4ba	circular-buffers.txt
-100644 blob 5c4bc4d01d0c32939af28b3c0044f1700231d4a1	clk.txt
-040000 tree 0f0536d144e4d4b9547db48a45a007dfe207e293	cma
-100644 blob 7f773d51fdd91acf10e49875abbe66fff0fae767	coccinelle.txt
-040000 tree a556d57f754fbaa46c7d0906ebec131e32eb6376	connector
-040000 tree 2db84b37022f7520c0c6bbfeec02c546ba553b46	console
-040000 tree 11e08c481fb1b35e5faecf7cb926f3d4efe78f87	cpu-freq
-100644 blob f9ad5e048b111297549df37cc6a6fc8bff1fc75a	cpu-hotplug.txt
-100644 blob 287224e57cfc5d2e75540e7c99cdd9e3f763ff7e	cpu-load.txt
-040000 tree 49738b4d2357cb08e9f1368e984815daab99dacd	cpuidle
-100644 blob 12b1b25b4da9711c95ab013adf1bec4214964d2c	cputopology.txt
-100644 blob a08a7dd9d6255867e88b1ccc51ef820eb635286c	crc32.txt
-040000 tree 7737f93e00f6311425f8d52af5ab63dd8bb26d64	cris
-040000 tree b2e8f35053e829bb602b71dc937a89c5f4b23c57	crypto
-100644 blob e1c52e2dc361607417693946573d8959c7e01b81	dcdbas.txt
-100644 blob 172ad4aec493cbe9a9db3b6193a43d8794b231e6	debugging-modules.txt
-100644 blob 03703afc4d302e7eeb7fb4031d494ab750233194	debugging-via-ohci1394.txt
-100644 blob d262e22bddec06945136bbec0e25826ef2df696e	dell_rbu.txt
-040000 tree bc28bfb6c3c0e63023b704090acb200fe2bdb1c1	development-process
-040000 tree adccded12cbd61b0f37fd603d09b99df8881cc7e	device-mapper
-100644 blob 87b4c5e82d39023094f9b5f9b10cf919e3740f9d	devices.txt
-040000 tree 64cd52d94d3e083b1c18cc633552b2550cf23e74	devicetree
-100644 blob 3f682889068bf932052737b57071ce715c851eda	digsig.txt
-100644 blob 480c8de3c2c44786174e112795f61b2381d3b09f	dma-buf-sharing.txt
-040000 tree a75e8c5eb06d2fc0b39427f20afd694f7e30e25a	dmaengine
-100644 blob 9de9813d0ec5df101a48428d40cfc9b9d2df6142	dontdiff
-040000 tree 213f8c902440f1b0d512b6d0f20252c028828556	driver-model
-040000 tree 0ebe2f7c24011ba6c1bae528431dc2c8f11889fc	dvb
-100644 blob 9417871b8758f26479e9c90e90a990988d657e8a	dynamic-debug-howto.txt
-040000 tree 020529dc9d406d453d30c463702d35e9ee2eef6d	early-userspace
-100644 blob 0cf27a3544a5744f39c232c75039a37ca079c2cd	edac.txt
-100644 blob 7747024d3bb70023fbff500cd3fc44546b31511b	efi-stub.txt
-100644 blob a55e4910924ea98b71969381b47ec16d922ecbdc	eisa.txt
-100644 blob 3fa450881ecb8e294a74d17766538804489fe9fd	email-clients.txt
-040000 tree 461c382186d40395ee88eba82b2ba8764285a35f	extcon
-040000 tree 475212bb9f2a96518b4da5f3fec8fe641e88c7e3	fault-injection
-040000 tree 4362119fa45f8ef6c411d2a269178f3bf1b7ed35	fb
-040000 tree 8abbff52bbacd5c4251af71bc2e30fd497b5feb0	features
-040000 tree 9e2856c144a66c8283dcd3f652edddac59e691bd	filesystems
-040000 tree aba7ab22ac20ede93689312a30310a5aa6793178	firmware_class
-100644 blob df904aec99044f8056ac530b9e9dc6de8f26f73e	flexible-arrays.txt
-040000 tree d4351d91b41949608f281d285520cc06b2b9d4fa	fmc
-040000 tree 2368701db45cbe838bc4721bde6ebcbab27b7737	frv
-100644 blob 77b36f59d16b452bbf12bba4e3db83ec3ea84a9f	futex-requeue-pi.txt
-100644 blob 7b727783db7ed4f87a7c68b44b52054c62f48e85	gcov.txt
-100644 blob 7050ce8794b9a4b3dd93b76dd9e2a6d708b468ee	gdb-kernel-debugging.txt
-040000 tree bcbdeb421fc8f6bfafa6a770cdbd6815eace6985	gpio
-040000 tree ceb5de1b9b291962ccbac05db7a66b6b84a2c802	hid
-100644 blob 6bad6f1d1cac4c16e513c491a5a6fb6df0c94786	highuid.txt
-100644 blob 6ac6cd51852af538efe38be0147fd585d14601a9	hsi.txt
-100644 blob 026e237bbc875ac0401cffaf33376e784da9a0b2	hw_random.txt
-040000 tree 0fd3a6b83e05058c3e8396a6f5e0d6d8e740492a	hwmon
-100644 blob 61c1ee98e59f2137b8b250d2b469d4d949cca9b3	hwspinlock.txt
-040000 tree eac8d0f964d8511d9cf9d1dcced3f3b54ce65c54	i2c
-040000 tree dbc729c5c0ad5e8c3b0921948a31695e2667dbdb	ia64
-040000 tree 75c7964c0da70c8fb033064f7503e037a181cde1	ide
-040000 tree 11cf0e775bfe35ea324fac18f8b6e7882edc1e35	infiniband
-100644 blob 535ad5e82b98cb5ed2adad76afc03be347b3af36	init.txt
-100644 blob 4e1839ccb555e32c7fc3915dd4a76a0f3664b26f	initrd.txt
-040000 tree 7d27d4c0f1e283e3435b24f7a3c9d1a4dc1a8bbc	input
-100644 blob 91d89c540709876eadba970228d317faa2dd2153	intel_txt.txt
-100644 blob 5ca78426f54c58d10e3fd0030ad51f6ccb2b5b9b	io-mapping.txt
-100644 blob 9faae6f26d3227d1799eae90e51471f00b82398d	io_ordering.txt
-040000 tree 75305cae2df1b51232f7e663a9d44f8d0a615fbf	ioctl
-100644 blob 65f694f2d1c9461c39f2ee71de4f24c7ddc62b02	iostats.txt
-100644 blob f6da05670e16d9dcfc3f8b7d50a1a4291ad8a974	irqflags-tracing.txt
-100644 blob 400d1b5b523dd8b80d3b5dfbeaf7962611ffd06a	isapnp.txt
-040000 tree 6d8fbb1e1d7bf73bd985dbc098ba953ce06db085	isdn
-040000 tree 3bcb74b2add6f724ab7f76133dc4471770e03c4d	ja_JP
-100644 blob 418020584ccc171b8ff079e496e73383f0f55c29	java.txt
-100644 blob 0d32355a4c348ce18cf4540e61a129b4cf2ac3fb	kasan.txt
-040000 tree 3e92f27cedbc6a0b52e06e4ba11e57e76826f402	kbuild
-040000 tree b508edd7ad1443bff47fc4ac1f843c84abbaaeb1	kdump
-100644 blob 78f69cdc9b3fbcec6f32beb179eb4c8732883d5a	kernel-doc-nano-HOWTO.txt
-100644 blob eda1eb1451a0881097bfaa8ad76c18acd6945f36	kernel-docs.txt
-100644 blob 22a4b687ea5b4b3cb9d576bfeffaed813256a795	kernel-parameters.txt
-100644 blob f4cbfe0ba1085b4df3067dcc457219699c5c6150	kernel-per-CPU-kthreads.txt
-100644 blob 80aae85d8da6c1b8476fd6824553ae7070e5c508	kmemcheck.txt
-100644 blob 18e24abb3ecf61b1f6a214af921af8bd138b27e4	kmemleak.txt
-040000 tree b51cd2dcf225f1004e4d23fd80db32f0de7f8ef3	ko_KR
-100644 blob 1be59a3a521c87fd6107fcdf64f7c7ac525d1512	kobject.txt
-100644 blob 1f9b3e2b98aec9a6687ae14b4f85d7c143729c07	kprobes.txt
-100644 blob ddf85a5dde0c12a435b9cbcc30f44159de5acc0b	kref.txt
-100644 blob a87d840bacfe11df785995eaee5698f23d565f94	kselftest.txt
-040000 tree 652f991d106263d2c68500cf5ad896612945c2b9	laptops
-100644 blob 4f80edd14d0a688d2a4cf1cdc491102601a53b9a	ldm.txt
-040000 tree 4839303afa967a2104cdaf8aeff6030f27e2b932	leds
-100644 blob 407576a233177c3c336827b952872c082207d9e4	local_ops.txt
-040000 tree 307372f9d9d08902e22d22034081806aa2fdd6b3	locking
-100644 blob 22dd6af2e4bd42152edbe872b224b85a769e7184	lockup-watchdogs.txt
-100644 blob 2eae75fecfb965f49065c680063a40c594736ee5	logo.gif
-100644 blob 296f0f7f67eb2d73be7ec80106feaf77c5aac163	logo.txt
-100644 blob ea45dd3901e3bfa2363bbe7a7009e0fc19809bfd	lzo.txt
-040000 tree c40b2eebc8f4266f6374c41dfa30d29d86bb57ea	m68k
-100644 blob 28befed9f6102a094702337a229b78c16a94bcde	magic-number.txt
-100644 blob 7ed371c852046b3dd5d993db1815d00a9d8f4bc0	mailbox.txt
-100644 blob 1b794369e03a4ef14099f4ce702fc0d7c65140c6	md-cluster.txt
-100644 blob 1a2ada46aaedae5162499886ec7c532d80c84b82	md.txt
-100644 blob f552a75c0e70b22b3800a3fa93c0783075228250	media-framework.txt
-100644 blob 2ba8461b0631de759fefd2a12918a6c4f4ee7562	memory-barriers.txt
-040000 tree d2fdb444074b09b83d1f74b2a190325606e3f31c	memory-devices
-100644 blob ce2cfcf35c27a0d0972547e82f61fbc38c85b5ab	memory-hotplug.txt
-100644 blob 30ded732027e2814ccc8c4cf5690a84fbc8ebc30	men-chameleon-bus.txt
-040000 tree f0b23005636d2d2e4a4b9f78567895a087610195	metag
-040000 tree 29c6681a225b17dbb0cd20b9d73e6d30bb846927	mic
-040000 tree 27c1a445222aeb50056defd34a41ea5ba41b7306	mips
-040000 tree 11295031a1fb2167d7816e2b4c53272f92489873	misc-devices
-040000 tree e45fccc68091d5b9c675558a8667af34923ec594	mmc
-040000 tree 1a438a86d22deddb5bf600b21242d0d3c79f0b04	mn10300
-100644 blob a78bf1ffa68cb4c4defe32146fc75f8449a46245	module-signing.txt
-100644 blob d01ac60521943756a99bfc07fe8fe05e6775626f	mono.txt
-040000 tree 3949e1a47604a29499fb37ee66a599004436a00b	mtd
-040000 tree d674dc07291045530f4b83ce02ec866765990853	namespaces
-040000 tree dbc8596c5816529d45d5339601d1ec9ceab2193b	netlabel
-040000 tree 0303625762b34a4fc5ac065d9aa84c489e8141a3	networking
-040000 tree 1f4b88a93381592d6b026ad6ed895cc42c551720	nfc
-040000 tree 983c152dbf360507b31e2326bb2a35c66eeddf20	nios2
-100644 blob ae57b9ea0d4169258b48b0531976b1a4a30eabae	nommu-mmap.txt
-100644 blob 1d9bbabb6c79abb04259b78481f7304abacbaccc	ntb.txt
-100644 blob 520327790d5431daae3a537d0fd36ec897cde5a8	numastat.txt
-040000 tree e11c61ab7124dd21cf150ab4c31bfd1e8fedab88	nvdimm
-040000 tree 2d0554d83b8cf9d2d361cc30e9794819658e3f1a	nvmem
-100644 blob f3ac05cc23e4abb0ea13277fc8a45873351e7ce3	oops-tracing.txt
-100644 blob 7ddfe216a0aa787a52421de6dc8ebc0f3b9002b2	padata.txt
-040000 tree 6814a2e66f30688c33b20c88907eaf4e2e0f8059	parisc
-100644 blob 120eb20dbb09199afc1628a2ca1187812789bde9	parport-lowlevel.txt
-100644 blob c208e4366c033d5bc5d1c40b6d055b7c722656d4	parport.txt
-040000 tree 8e50ccd74aeee952f963e0d70cea243bd078f22a	pcmcia
-100644 blob 7d3c82431909dd8120322e2360ce32cbd93f87e5	percpu-rw-semaphore.txt
-100644 blob b388c5af9e726fe8fdd2eaec09eb1b9374f16b87	phy.txt
-040000 tree ea4f357d526fbce14e0c2879c95a8bbafd7b3d5e	phy
-100644 blob 9a5bc8651c2923c619b168c1719f1e25e381e368	pi-futex.txt
-100644 blob 4976389e432d4dd5207d65ad7c37d407c00d9d87	pinctrl.txt
-040000 tree 90cc82c9b546a1c94b1545800b84303562744d1f	platform
-100644 blob 763e4659bf186fceff80ae17f50e7b495fe3e7b6	pnp.txt
-040000 tree 0487c8fa4b60c90fd12de8c9ef7574d749f9ac4b	power
-040000 tree 1d2f3280d25fca0e5a0f703e82177298911df260	powerpc
-040000 tree 591eb3d2ce87db9b11b8e84270dfa59ef49854ee	pps
-040000 tree 98f3e67e4e4688c5a4e439caed2c6db2ae811d1a	prctl
-100644 blob e89ce6624af2fab481a708ad1a0e4e20d1bc0c1c	preempt-locking.txt
-100644 blob 2216eb187c213b4c0c5140a760f9df3098150e41	printk-formats.txt
-040000 tree da1837f687e5d470a7907a0ece81c877987fd282	pti
-040000 tree 962176c51cfe9f3846ab59aafdcc0f07db4e765a	ptp
-100644 blob ca895fd211e4e9f5f6bd0fc6a13bf60d9a0c14b2	pwm.txt
-100644 blob 5d8675615e59c40c6564710a0a9b73ae060e2a00	ramoops.txt
-040000 tree d51ed0cdcddfd9bd8bccbe8169ee47b61fcdc756	rapidio
-100644 blob 39873ef41bf9fc1a72b8a2e9ace8284babe74abe	rbtree.txt
-100644 blob ef0219fa4bb4cf5beb9078293a92b3ccbcbe0d48	remoteproc.txt
-100644 blob 2ee6ef9a6554d600088ae572b3256ffe44e51d08	rfkill.txt
-100644 blob 16eb314f56cc45ce923d9354960bdf67ea4e6b98	robust-futex-ABI.txt
-100644 blob af6fce23e4847709d32ddee025cafb055326f171	robust-futexes.txt
-100644 blob f7edc3aa1e92d4e2eac9ed143212f9757577f041	rpmsg.txt
-100644 blob 8446f1ea1410b87b071047dc310a787a92606c31	rtc.txt
-040000 tree c7b9d98141594d46c92b026a63f854017c8039e5	s390
-040000 tree 5d3736128a6ad1ba76f945c4389034f7aa0b5681	scheduler
-040000 tree 1d347ab5c9dce9eb05bf5be505afb6529183f5af	scsi
-040000 tree e8e43eadba479833220bf3fa3d1fbaefe9a17991	security
-100644 blob 9a7bc8b3f479b2b82dbfa1056df060366dbafdec	serial-console.txt
-040000 tree 39133be11e4495c042f2439e984984bec4e63cb6	serial
-100644 blob 876c96ae38dba1402e79c11a10ff1c64eb5741fd	sgi-ioc4.txt
-040000 tree e6a02a1b02f80ba24307f22431ccceb6fb308838	sh
-100644 blob 6b492e82b43d98b93020e033ea1b108adbbf6033	smsc_ece1099.txt
-040000 tree 887a845d843820c990ab3cc6251d56a864b9fa34	sound
-100644 blob eceab1308a8c2fbde6722232db18bbb57a6e7f2e	sparse.txt
-040000 tree 78f79272aa73a95571b1c2d4ea4702b1eaeecb46	spi
-100644 blob db3be892afb2b64ee582a5e43ce87223a1251ad3	stable_api_nonsense.txt
-100644 blob 3049a612291b1ad8651da72c6081539bb4e83a74	stable_kernel_rules.txt
-100644 blob 477927becacba69ee4bdea2203dd796979d14449	static-keys.txt
-100644 blob cd66ec836e4f45aae80754ece6c384cfd2f45b95	svga.txt
-040000 tree a9a8db7e58ce0082f02604d6f86ab4dd5f32ff9f	sysctl
-100644 blob ce60ffa94d2d709681ed339fc4ef25369a2c377d	sysfs-rules.txt
-100644 blob 13f5619b2203e68af6d766f66a8137dd1133d4fa	sysrq.txt
-040000 tree 9f25dc697646d3ee9505b920a07e4caaf976345d	target
-040000 tree 9d4f3319f51b26a7697e109e9d1ba7f435603a5d	thermal
-100644 blob 2cbf71975381d0a850d1a254aa76af7957b35058	this_cpu_ops.txt
-040000 tree 3e4b4130aa6d96892130c0e74d8efedd6874f4e7	timers
-040000 tree d1b46a427ea95f8e3e49dac8b035c3970d794e15	tpm
-040000 tree db021902c4a4d411ee1b168b4670e490fa7c1b36	trace
-100644 blob a445da098bc6e5aa733cd55ca2ee8b4a5f04dc2c	unaligned-memory-access.txt
-100644 blob 4a33f81cadb10165fad3ca7014f83b54f492a4bb	unicode.txt
-100644 blob a8643513a5f6cb25851140c021aec4a671c8b62c	unshare.txt
-040000 tree bc63f554449a02f3f2d80817327846e127b2c0f1	usb
-040000 tree 04a86dfd52c143ed1352758c8e93871cf3c67a2c	vDSO
-100644 blob 1dd3fddfd3a1e536de39b69c37168dfc35553a4a	vfio.txt
-100644 blob 014423e2824c23fa5b08552e292db52fa25013a7	vgaarbiter.txt
-100644 blob e517011be4f964db7b452e1e50420eaed83f143d	video-output.txt
-040000 tree 0613d846d1dffae70dabcc998a5fdacd7f5b7a4e	video4linux
-040000 tree bfa10f433ac83ca402ed876f705cb0f4a9e31c75	virtual
-040000 tree abe2d8a8bbd0f97a2c5485d6adb62c14113bc3d6	vm
-100644 blob ca5b82797f6c5c79c949a38cd7d7c19270035993	vme_api.txt
-100644 blob db0cb228d64aa4a80a4fe380be3e46439de810e6	volatile-considered-harmful.txt
-040000 tree 06051b06aeeee33b30966fbf0b53b241c6261454	w1
-040000 tree e796cb3b81fab2327d367e17ba75bac24540c59e	watchdog
-040000 tree b48b24715e6929469eb3e7a96eecf7f00e14a607	wimax
-100644 blob 5e0e05c5183e290e8d78c531a3f42bc3c85377f7	workqueue.txt
-040000 tree 1390d65651d4d0aab960bf20b55d5562c727a81e	x86
-100644 blob 81d111b4dc28e15d3ab7471f8be1b8f42fe63e4c	xillybus.txt
-040000 tree afee3267cb7f59a0e0236309e27e14985618d523	xtensa
-100644 blob 2cf3e2608de324b5622673943807b8e8b353e2da	xz.txt
-040000 tree d9c00fe0c456581fc233ad805191be86b387b605	zh_CN
-100644 blob 90a64d52bea2f33464f86e4dc93954b2bc105f50	zorro.txt
-            """,  # NOQA
-            "e202fc2cf10dcc460aaf469db4cb5379bbe326d8":
-            """
-100644 blob 5b6e7c66c276e7610d4a73c70ec1a1f7c1003259    COPYING
-100644 blob 13248728a1c884756a0e265faf5b679ec27f47bc    Copyright
-100644 blob d8b02abb7e1a3523a40f8b7cbfb7d05f6fca8557    Makefile.pre
-100644 blob 886eacfa48acef07d6d0b5b3b197811ab7775340    README
-100755 blob 2a5781c640c10f05d7f194e0f1d24aaa96833e46    configure
-040000 tree 656a2f680866edaf80fdfbcc7db503fe06b6772d    doc
-100644 blob b4d29e3dd5710423b57f388dfec3acd3d04b76f7    es.cwl
-100644 blob b883cd6b699486be32abaeeb15eacdfb4d816893    es.dat
-100644 blob 4103348bbbbc69ea08f2c970c3e360794137ed8c    es.multi
-100644 blob c3afb3608574b7afa5364468b5267c0824c8f079    espa\udcf1ol.alias
-100644 blob c3afb3608574b7afa5364468b5267c0824c8f079    esponol.alias
-100644 blob 7926a11dac0dc13055ed8a4ada14b7985a3332f5    info
-100644 blob c3afb3608574b7afa5364468b5267c0824c8f079    spanish.alias
-"""
-    }  # NOQA
-
-    @istest
-    def compute_complex_directories_git_sha1(self):
-        for sha1 in self.to_checks.keys():
-            sha1_input = self.to_checks[sha1]
-            self.assertEquals(sha1, compute_tree_hash('some-path', sha1_input,
-                                                      sha1))
diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py
index c9f47e1e6b34289a3196b05c7d2a5c973bbc6fcc..1a4f24a606fc0cda16b4fc3df92ba49799b3145e 100644
--- a/swh/model/tests/test_hashutil.py
+++ b/swh/model/tests/test_hashutil.py
@@ -4,6 +4,7 @@
 # See top-level LICENSE file for more information
 
 import io
+import os
 import tempfile
 import unittest
 
@@ -92,8 +93,9 @@ class Hashutil(unittest.TestCase):
     def hash_path(self):
         with tempfile.NamedTemporaryFile(delete=False) as f:
             f.write(self.data)
-            f.close()
-            hashes = hashutil.hash_path(f.name)
+
+        hashes = hashutil.hash_path(f.name)
+        os.remove(f.name)
 
         self.checksums['length'] = len(self.data)
         self.assertEquals(self.checksums, hashes)
diff --git a/swh/model/tests/test_merkle.py b/swh/model/tests/test_merkle.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f438928decb2018645e71b66a898980173e024f
--- /dev/null
+++ b/swh/model/tests/test_merkle.py
@@ -0,0 +1,229 @@
+# Copyright (C) 2017 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+
+from swh.model import merkle
+
+
+class TestedMerkleNode(merkle.MerkleNode):
+    type = 'tested_merkle_node_type'
+
+    def __init__(self, data):
+        super().__init__(data)
+        self.compute_hash_called = 0
+
+    def compute_hash(self):
+        self.compute_hash_called += 1
+        child_data = [
+            child + b'=' + self[child].hash
+            for child in sorted(self)
+        ]
+
+        return (
+            b'hash('
+            + b', '.join([self.data['value']] + child_data)
+            + b')'
+        )
+
+
+class TestedMerkleLeaf(merkle.MerkleLeaf):
+    type = 'tested_merkle_leaf_type'
+
+    def __init__(self, data):
+        super().__init__(data)
+        self.compute_hash_called = 0
+
+    def compute_hash(self):
+        self.compute_hash_called += 1
+        return b'hash(' + self.data['value'] + b')'
+
+
+class TestMerkleLeaf(unittest.TestCase):
+    def setUp(self):
+        self.data = {'value': b'value'}
+        self.instance = TestedMerkleLeaf(self.data)
+
+    def test_hash(self):
+        self.assertEqual(self.instance.compute_hash_called, 0)
+        instance_hash = self.instance.hash
+        self.assertEqual(self.instance.compute_hash_called, 1)
+        instance_hash2 = self.instance.hash
+        self.assertEqual(self.instance.compute_hash_called, 1)
+        self.assertEqual(instance_hash, instance_hash2)
+
+    def test_data(self):
+        self.assertEqual(self.instance.get_data(), self.data)
+
+    def test_collect(self):
+        collected = self.instance.collect()
+        self.assertEqual(
+            collected, {
+                self.instance.type: {
+                    self.instance.hash: self.instance.get_data(),
+                },
+            },
+        )
+        collected2 = self.instance.collect()
+        self.assertEqual(collected2, {})
+        self.instance.reset_collect()
+        collected3 = self.instance.collect()
+        self.assertEqual(collected, collected3)
+
+    def test_leaf(self):
+        with self.assertRaisesRegex(ValueError, 'is a leaf'):
+            self.instance[b'key1'] = 'Test'
+
+        with self.assertRaisesRegex(ValueError, 'is a leaf'):
+            del self.instance[b'key1']
+
+        with self.assertRaisesRegex(ValueError, 'is a leaf'):
+            self.instance[b'key1']
+
+        with self.assertRaisesRegex(ValueError, 'is a leaf'):
+            self.instance.update(self.data)
+
+
+class TestMerkleNode(unittest.TestCase):
+    maxDiff = None
+
+    def setUp(self):
+        self.root = TestedMerkleNode({'value': b'root'})
+        self.nodes = {b'root': self.root}
+        for i in (b'a', b'b', b'c'):
+            value = b'root/' + i
+            node = TestedMerkleNode({
+                'value': value,
+            })
+            self.root[i] = node
+            self.nodes[value] = node
+            for j in (b'a', b'b', b'c'):
+                value2 = value + b'/' + j
+                node2 = TestedMerkleNode({
+                    'value': value2,
+                })
+                node[j] = node2
+                self.nodes[value2] = node2
+                for k in (b'a', b'b', b'c'):
+                    value3 = value2 + b'/' + j
+                    node3 = TestedMerkleNode({
+                        'value': value3,
+                    })
+                    node2[j] = node3
+                    self.nodes[value3] = node3
+
+    def test_hash(self):
+        for node in self.nodes.values():
+            self.assertEqual(node.compute_hash_called, 0)
+
+        # Root hash will compute hash for all the nodes
+        hash = self.root.hash
+        for node in self.nodes.values():
+            self.assertEqual(node.compute_hash_called, 1)
+            self.assertIn(node.data['value'], hash)
+
+        # Should use the cached value
+        hash2 = self.root.hash
+        self.assertEqual(hash, hash2)
+        for node in self.nodes.values():
+            self.assertEqual(node.compute_hash_called, 1)
+
+        # Should still use the cached value
+        hash3 = self.root.update_hash(force=False)
+        self.assertEqual(hash, hash3)
+        for node in self.nodes.values():
+            self.assertEqual(node.compute_hash_called, 1)
+
+        # Force update of the cached value for a deeply nested node
+        self.root[b'a'][b'b'].update_hash(force=True)
+        for key, node in self.nodes.items():
+            # update_hash rehashes all children
+            if key.startswith(b'root/a/b'):
+                self.assertEqual(node.compute_hash_called, 2)
+            else:
+                self.assertEqual(node.compute_hash_called, 1)
+
+        hash4 = self.root.hash
+        self.assertEqual(hash, hash4)
+        for key, node in self.nodes.items():
+            # update_hash also invalidates all parents
+            if key in (b'root', b'root/a') or key.startswith(b'root/a/b'):
+                self.assertEqual(node.compute_hash_called, 2)
+            else:
+                self.assertEqual(node.compute_hash_called, 1)
+
+    def test_collect(self):
+        collected = self.root.collect()
+        self.assertEqual(len(collected[self.root.type]), len(self.nodes))
+        for node in self.nodes.values():
+            self.assertTrue(node.collected)
+        collected2 = self.root.collect()
+        self.assertEqual(collected2, {})
+
+    def test_get(self):
+        for key in (b'a', b'b', b'c'):
+            self.assertEqual(self.root[key], self.nodes[b'root/' + key])
+
+        with self.assertRaisesRegex(KeyError, "b'nonexistent'"):
+            self.root[b'nonexistent']
+
+    def test_del(self):
+        hash_root = self.root.hash
+        hash_a = self.nodes[b'root/a'].hash
+        del self.root[b'a'][b'c']
+        hash_root2 = self.root.hash
+        hash_a2 = self.nodes[b'root/a'].hash
+
+        self.assertNotEqual(hash_root, hash_root2)
+        self.assertNotEqual(hash_a, hash_a2)
+
+        self.assertEqual(self.nodes[b'root/a/c'].parents, [])
+
+        with self.assertRaisesRegex(KeyError, "b'nonexistent'"):
+            del self.root[b'nonexistent']
+
+    def test_update(self):
+        hash_root = self.root.hash
+        hash_b = self.root[b'b'].hash
+        new_children = {
+            b'c': TestedMerkleNode({'value': b'root/b/new_c'}),
+            b'd': TestedMerkleNode({'value': b'root/b/d'}),
+        }
+
+        # collect all nodes
+        self.root.collect()
+
+        self.root[b'b'].update(new_children)
+
+        # Ensure everyone got reparented
+        self.assertEqual(new_children[b'c'].parents, [self.root[b'b']])
+        self.assertEqual(new_children[b'd'].parents, [self.root[b'b']])
+        self.assertEqual(self.nodes[b'root/b/c'].parents, [])
+
+        hash_root2 = self.root.hash
+        self.assertNotEqual(hash_root, hash_root2)
+        self.assertIn(b'root/b/new_c', hash_root2)
+        self.assertIn(b'root/b/d', hash_root2)
+
+        hash_b2 = self.root[b'b'].hash
+        self.assertNotEqual(hash_b, hash_b2)
+
+        for key, node in self.nodes.items():
+            if key in (b'root', b'root/b'):
+                self.assertEqual(node.compute_hash_called, 2)
+            else:
+                self.assertEqual(node.compute_hash_called, 1)
+
+        # Ensure we collected root, root/b, and both new children
+        collected_after_update = self.root.collect()
+        self.assertCountEqual(
+            collected_after_update[TestedMerkleNode.type],
+            [self.nodes[b'root'].hash, self.nodes[b'root/b'].hash,
+             new_children[b'c'].hash, new_children[b'd'].hash],
+        )
+
+        # test that noop updates doesn't invalidate anything
+        self.root[b'a'][b'b'].update({})
+        self.assertEqual(self.root.collect(), {})
diff --git a/version.txt b/version.txt
index 155f9176266716cbc564ef72622a80ae282fb380..99574ed9d87d56511e39bcd98638ae7f79fae840 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-v0.0.17-0-g11de644
\ No newline at end of file
+v0.0.18-0-g34228c5
\ No newline at end of file