diff --git a/PKG-INFO b/PKG-INFO
index f0a6d7d6146ff5af350c267bb615f22127d353bf..8171a982ef1699f1e87a9a39da03c74196ad94b8 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: swh.model
-Version: 0.0.56
+Version: 0.0.57
 Summary: Software Heritage data model
 Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
 Author: Software Heritage developers
diff --git a/requirements-cli.txt b/requirements-cli.txt
index f58c0c8bf39fc7ae9208e241cecc451265b321e2..7365d1f9c6169a1f81b7a2272b8fe543e524297a 100644
--- a/requirements-cli.txt
+++ b/requirements-cli.txt
@@ -1,2 +1,3 @@
+swh.core
 Click
 dulwich
diff --git a/requirements.txt b/requirements.txt
index 98825fa3ef8c1821335b73ab07e581747024d346..1577daa98bd6212c48392847cb70ab54c9cb523b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ vcversioner
 attrs
 hypothesis
 python-dateutil
+iso8601
diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO
index f0a6d7d6146ff5af350c267bb615f22127d353bf..8171a982ef1699f1e87a9a39da03c74196ad94b8 100644
--- a/swh.model.egg-info/PKG-INFO
+++ b/swh.model.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: swh.model
-Version: 0.0.56
+Version: 0.0.57
 Summary: Software Heritage data model
 Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
 Author: Software Heritage developers
diff --git a/swh.model.egg-info/requires.txt b/swh.model.egg-info/requires.txt
index 718cd8a1eee84fa9efce95ccfd41e2aff20ec815..7bf73112692c2e56620cf1d65df95a5ca1354e02 100644
--- a/swh.model.egg-info/requires.txt
+++ b/swh.model.egg-info/requires.txt
@@ -2,11 +2,13 @@ vcversioner
 attrs
 hypothesis
 python-dateutil
+iso8601
 
 [:python_version < "3.6"]
 pyblake2
 
 [cli]
+swh.core
 Click
 dulwich
 
diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py
index 64a6ef7a824f8da162a7152708caddfaad793861..583df11cf3cccb4a4dfe4fc4356d67034bb8d637 100644
--- a/swh/model/from_disk.py
+++ b/swh/model/from_disk.py
@@ -7,15 +7,36 @@ import enum
 import os
 import stat
 
-from typing import List
+import attr
+from typing import List, Optional
 
-from .hashutil import MultiHash, HASH_BLOCK_SIZE
+from .hashutil import MultiHash
 from .merkle import MerkleLeaf, MerkleNode
 from .identifiers import (
-    directory_identifier,
+    directory_entry_sort_key, directory_identifier,
     identifier_to_bytes as id_to_bytes,
     identifier_to_str as id_to_str,
 )
+from . import model
+
+
+@attr.s
+class DiskBackedContent(model.Content):
+    """Subclass of Content, which allows lazy-loading data from the disk."""
+    path = attr.ib(type=Optional[bytes], default=None)
+
+    def __attrs_post_init__(self):
+        if self.path is None:
+            raise TypeError('path must not be None.')
+
+    def with_data(self) -> model.Content:
+        args = self.to_dict()
+        del args['path']
+        assert self.path is not None
+        with open(self.path, 'rb') as fd:
+            return model.Content.from_dict({
+                **args,
+                'data': fd.read()})
 
 
 class DentryPerms(enum.IntEnum):
@@ -83,6 +104,7 @@ class Content(MerkleLeaf):
         ret['length'] = len(data)
         ret['perms'] = mode_to_perms(mode)
         ret['data'] = data
+        ret['status'] = 'visible'
 
         return cls(ret)
 
@@ -92,7 +114,8 @@ class Content(MerkleLeaf):
         return cls.from_bytes(mode=mode, data=os.readlink(path))
 
     @classmethod
-    def from_file(cls, *, path, data=False, save_path=False):
+    def from_file(
+            cls, *, path, max_content_length=None):
         """Compute the Software Heritage content entry corresponding to an
         on-disk file.
 
@@ -101,42 +124,53 @@ class Content(MerkleLeaf):
         - using the content as a directory entry in a directory
 
         Args:
-          path (bytes): path to the file for which we're computing the
-            content entry
-          data (bool): add the file data to the entry
           save_path (bool): add the file path to the entry
+          max_content_length (Optional[int]): if given, all contents larger
+            than this will be skipped.
 
         """
         file_stat = os.lstat(path)
         mode = file_stat.st_mode
+        length = file_stat.st_size
+        too_large = max_content_length is not None \
+            and length > max_content_length
 
         if stat.S_ISLNK(mode):
             # Symbolic link: return a file whose contents are the link target
+
+            if too_large:
+                # Unlike large contents, we can't stream symlinks to
+                # MultiHash, and we don't want to fit them in memory if
+                # they exceed max_content_length either.
+                # Thankfully, this should not happen for reasonable values of
+                # max_content_length because of OS/filesystem limitations,
+                # so let's just raise an error.
+                raise Exception(f'Symlink too large ({length} bytes)')
+
             return cls.from_symlink(path=path, mode=mode)
         elif not stat.S_ISREG(mode):
             # not a regular file: return the empty file instead
             return cls.from_bytes(mode=mode, data=b'')
 
-        length = file_stat.st_size
-
-        if not data:
-            ret = MultiHash.from_path(path).digest()
+        if too_large:
+            skip_reason = 'Content too large'
+        else:
+            skip_reason = None
+
+        hashes = MultiHash.from_path(path).digest()
+        if skip_reason:
+            ret = {
+                **hashes,
+                'status': 'absent',
+                'reason': skip_reason,
+            }
         else:
-            h = MultiHash(length=length)
-            chunks = []
-            with open(path, 'rb') as fobj:
-                while True:
-                    chunk = fobj.read(HASH_BLOCK_SIZE)
-                    if not chunk:
-                        break
-                    h.update(chunk)
-                    chunks.append(chunk)
-
-            ret = h.digest()
-            ret['data'] = b''.join(chunks)
-
-        if save_path:
-            ret['path'] = path
+            ret = {
+                **hashes,
+                'status': 'visible',
+            }
+
+        ret['path'] = path
         ret['perms'] = mode_to_perms(mode)
         ret['length'] = length
 
@@ -149,6 +183,18 @@ class Content(MerkleLeaf):
     def compute_hash(self):
         return self.data['sha1_git']
 
+    def to_model(self) -> model.BaseContent:
+        """Builds a `model.BaseContent` object based on this leaf."""
+        data = self.get_data().copy()
+        data.pop('perms', None)
+        if data['status'] == 'absent':
+            data.pop('path', None)
+            return model.SkippedContent.from_dict(data)
+        elif 'data' in data:
+            return model.Content.from_dict(data)
+        else:
+            return DiskBackedContent.from_dict(data)
+
 
 def accept_all_directories(dirname, entries):
     """Default filter for :func:`Directory.from_disk` accepting all
@@ -220,8 +266,9 @@ class Directory(MerkleNode):
     type = 'directory'
 
     @classmethod
-    def from_disk(cls, *, path, data=False, save_path=False,
-                  dir_filter=accept_all_directories):
+    def from_disk(cls, *, path,
+                  dir_filter=accept_all_directories,
+                  max_content_length=None):
         """Compute the Software Heritage objects for a given directory tree
 
         Args:
@@ -232,6 +279,8 @@ class Directory(MerkleNode):
             name or contents. Takes two arguments: dirname and entries, and
             returns True if the directory should be added, False if the
             directory should be ignored.
+          max_content_length (Optional[int]): if given, all contents larger
+            than this will be skipped.
         """
 
         top_path = path
@@ -244,8 +293,8 @@ class Directory(MerkleNode):
             for name in fentries + dentries:
                 path = os.path.join(root, name)
                 if not os.path.isdir(path) or os.path.islink(path):
-                    content = Content.from_file(path=path, data=data,
-                                                save_path=save_path)
+                    content = Content.from_file(
+                        path=path, max_content_length=max_content_length)
                     entries[name] = content
                 else:
                     if dir_filter(name, dirs[path].entries):
@@ -291,17 +340,24 @@ class Directory(MerkleNode):
 
     @property
     def entries(self):
+        """Child nodes, sorted by name in the same way `directory_identifier`
+        does."""
         if self.__entries is None:
-            self.__entries = [
+            self.__entries = sorted((
                 self.child_to_directory_entry(name, child)
                 for name, child in self.items()
-            ]
+            ), key=directory_entry_sort_key)
 
         return self.__entries
 
     def compute_hash(self):
         return id_to_bytes(directory_identifier({'entries': self.entries}))
 
+    def to_model(self) -> model.Directory:
+        """Builds a `model.Directory` object based on this node;
+        ignoring its children."""
+        return model.Directory.from_dict(self.get_data())
+
     def __getitem__(self, key):
         if not isinstance(key, bytes):
             raise ValueError('Can only get a bytes from Directory')
diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
index 9257de142f168d62f7bbf868c4ed6811d6e57896..85fc76c4585ccba88b8f7e69eca8ae2f9ff34ea1 100644
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -114,7 +114,7 @@ def content_identifier(content):
     return MultiHash.from_data(content['data']).digest()
 
 
-def _sort_key(entry):
+def directory_entry_sort_key(entry):
     """The sorting key for tree entries"""
     if entry['type'] == 'dir':
         return entry['name'] + b'/'
@@ -182,7 +182,7 @@ def directory_identifier(directory):
 
     components = []
 
-    for entry in sorted(directory['entries'], key=_sort_key):
+    for entry in sorted(directory['entries'], key=directory_entry_sort_key):
         components.extend([
             _perms_to_bytes(entry['perms']),
             b'\x20',
diff --git a/swh/model/merkle.py b/swh/model/merkle.py
index 02c6f2b29d17e5f6d9dc5336fe760bfc68d1617e..9d97efdc55b1c0bf23c5abfdea8f995988197ea9 100644
--- a/swh/model/merkle.py
+++ b/swh/model/merkle.py
@@ -8,7 +8,7 @@
 import abc
 import collections
 
-from typing import List, Optional
+from typing import Iterator, List, Optional, Set
 
 
 def deep_update(left, right):
@@ -120,6 +120,13 @@ class MerkleNode(dict, metaclass=abc.ABCMeta):
         self.__hash = None
         self.collected = False
 
+    def __eq__(self, other):
+        return isinstance(other, MerkleNode) \
+            and super().__eq__(other) and self.data == other.data
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
     def invalidate_hash(self):
         """Invalidate the cached hash of the current node."""
         if not self.__hash:
@@ -266,6 +273,20 @@ class MerkleNode(dict, metaclass=abc.ABCMeta):
         for child in self.values():
             child.reset_collect()
 
+    def iter_tree(self) -> Iterator['MerkleNode']:
+        """Yields all children nodes, recursively. Common nodes are
+        deduplicated.
+        """
+        yield from self._iter_tree(set())
+
+    def _iter_tree(
+            self, seen: Set[bytes]) -> Iterator['MerkleNode']:
+        if self.hash not in seen:
+            seen.add(self.hash)
+            yield self
+            for child in self.values():
+                yield from child._iter_tree(seen=seen)
+
 
 class MerkleLeaf(MerkleNode):
     """A leaf to a Merkle tree.
diff --git a/swh/model/model.py b/swh/model/model.py
index 512824d60a1ab05254dbb7649d6f0ba2ffc54bd6..aff5a7d64a4d840710e66b0fe323395fab0b5c8d 100644
--- a/swh/model/model.py
+++ b/swh/model/model.py
@@ -7,10 +7,11 @@ import datetime
 
 from abc import ABCMeta, abstractmethod
 from enum import Enum
-from typing import List, Optional, Dict
+from typing import List, Optional, Dict, Union
 
 import attr
 import dateutil.parser
+import iso8601
 
 from .identifiers import (
     normalize_timestamp, directory_identifier, revision_identifier,
@@ -18,6 +19,13 @@ from .identifiers import (
 )
 from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes
 
+
+class MissingData(Exception):
+    """Raised by `Content.with_data` when it has no way of fetching the
+    data (but not when fetching the data fails)."""
+    pass
+
+
 SHA1_SIZE = 20
 
 # TODO: Limit this to 20 bytes
@@ -76,9 +84,9 @@ class HashableObject(metaclass=ABCMeta):
 @attr.s(frozen=True)
 class Person(BaseModel):
     """Represents the author/committer of a revision or release."""
-    name = attr.ib(type=bytes)
-    email = attr.ib(type=bytes)
     fullname = attr.ib(type=bytes)
+    name = attr.ib(type=Optional[bytes])
+    email = attr.ib(type=Optional[bytes])
 
 
 @attr.s(frozen=True)
@@ -117,15 +125,31 @@ class TimestampWithTimezone(BaseModel):
             raise ValueError('offset too large: %d minutes' % value)
 
     @classmethod
-    def from_dict(cls, d):
+    def from_dict(cls, obj: Union[Dict, datetime.datetime, int]):
         """Builds a TimestampWithTimezone from any of the formats
         accepted by :func:`swh.model.normalize_timestamp`."""
-        d = normalize_timestamp(d)
+        # TODO: this accept way more types than just dicts; find a better
+        # name
+        d = normalize_timestamp(obj)
         return cls(
             timestamp=Timestamp.from_dict(d['timestamp']),
             offset=d['offset'],
             negative_utc=d['negative_utc'])
 
+    @classmethod
+    def from_datetime(cls, dt: datetime.datetime):
+        return cls.from_dict(dt)
+
+    @classmethod
+    def from_iso8601(cls, s):
+        """Builds a TimestampWithTimezone from an ISO8601-formatted string.
+        """
+        dt = iso8601.parse_date(s)
+        tstz = cls.from_datetime(dt)
+        if dt.tzname() == '-00:00':
+            tstz = attr.evolve(tstz, negative_utc=True)
+        return tstz
+
 
 @attr.s(frozen=True)
 class Origin(BaseModel):
@@ -362,6 +386,10 @@ class Directory(BaseModel, HashableObject):
 
 @attr.s(frozen=True)
 class BaseContent(BaseModel):
+    status = attr.ib(
+        type=str,
+        validator=attr.validators.in_(['visible', 'hidden', 'absent']))
+
     def to_dict(self):
         content = super().to_dict()
         if content['ctime'] is None:
@@ -384,6 +412,10 @@ class BaseContent(BaseModel):
             raise ValueError('{} is not a valid hash name.'.format(hash_name))
         return getattr(self, hash_name)
 
+    def hashes(self) -> Dict[str, bytes]:
+        """Returns a dictionary {hash_name: hash_value}"""
+        return {algo: getattr(self, algo) for algo in DEFAULT_ALGORITHMS}
+
 
 @attr.s(frozen=True)
 class Content(BaseContent):
@@ -398,8 +430,8 @@ class Content(BaseContent):
         type=str,
         default='visible',
         validator=attr.validators.in_(['visible', 'hidden']))
-    data = attr.ib(type=Optional[bytes],
-                   default=None)
+
+    data = attr.ib(type=Optional[bytes], default=None)
 
     ctime = attr.ib(type=Optional[datetime.datetime],
                     default=None)
@@ -420,6 +452,16 @@ class Content(BaseContent):
     def from_dict(cls, d):
         return super().from_dict(d, use_subclass=False)
 
+    def with_data(self) -> 'Content':
+        """Loads the `data` attribute; meaning that it is guaranteed not to
+        be None after this call.
+
+        This call is almost a no-op, but subclasses may overload this method
+        to lazy-load data (eg. from disk or objstorage)."""
+        if self.data is None:
+            raise MissingData('Content data is None.')
+        return self
+
 
 @attr.s(frozen=True)
 class SkippedContent(BaseContent):
@@ -428,7 +470,7 @@ class SkippedContent(BaseContent):
     sha256 = attr.ib(type=Optional[bytes])
     blake2s256 = attr.ib(type=Optional[bytes])
 
-    length = attr.ib(type=int)
+    length = attr.ib(type=Optional[int])
 
     status = attr.ib(
         type=str,
diff --git a/swh/model/tests/test_from_disk.py b/swh/model/tests/test_from_disk.py
index 7b21d20e3af2ccb31aca3607e635845f50592c7d..d9881a1529e46c71f5484607dab5cb68b0803251 100644
--- a/swh/model/tests/test_from_disk.py
+++ b/swh/model/tests/test_from_disk.py
@@ -12,8 +12,11 @@ import unittest
 from typing import ClassVar, Optional
 
 from swh.model import from_disk
-from swh.model.from_disk import Content, DentryPerms, Directory
+from swh.model.from_disk import (
+    Content, DentryPerms, Directory, DiskBackedContent
+)
 from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
+from swh.model import model
 
 TEST_DATA = os.path.join(os.path.dirname(__file__), 'data')
 
@@ -48,6 +51,57 @@ class ModeToPerms(unittest.TestCase):
             self.assertEqual(perm, from_disk.mode_to_perms(fmode))
 
 
+class TestDiskBackedContent(unittest.TestCase):
+    def test_with_data(self):
+        expected_content = model.Content(
+            length=42, status='visible', data=b'foo bar',
+            sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+        with tempfile.NamedTemporaryFile(mode='w+b') as fd:
+            content = DiskBackedContent(
+                length=42, status='visible', path=fd.name,
+                sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+            fd.write(b'foo bar')
+            fd.seek(0)
+            content_with_data = content.with_data()
+
+        assert expected_content == content_with_data
+
+    def test_lazy_data(self):
+        with tempfile.NamedTemporaryFile(mode='w+b') as fd:
+            fd.write(b'foo')
+            fd.seek(0)
+            content = DiskBackedContent(
+                length=42, status='visible', path=fd.name,
+                sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+            fd.write(b'bar')
+            fd.seek(0)
+            content_with_data = content.with_data()
+            fd.write(b'baz')
+            fd.seek(0)
+
+        assert content_with_data.data == b'bar'
+
+    def test_with_data_cannot_read(self):
+        with tempfile.NamedTemporaryFile(mode='w+b') as fd:
+            content = DiskBackedContent(
+                length=42, status='visible', path=fd.name,
+                sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+
+        with pytest.raises(OSError):
+            content.with_data()
+
+    def test_missing_path(self):
+        with pytest.raises(TypeError):
+            DiskBackedContent(
+                length=42, status='visible',
+                sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+
+        with pytest.raises(TypeError):
+            DiskBackedContent(
+                length=42, status='visible', path=None,
+                sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+
+
 class DataMixin:
     maxDiff = None  # type: ClassVar[Optional[int]]
 
@@ -102,7 +156,6 @@ class DataMixin:
 
         self.specials = {
             b'fifo': os.mkfifo,
-            b'devnull': lambda path: os.mknod(path, device=os.makedev(1, 3)),
         }
 
         self.empty_content = {
@@ -402,19 +455,19 @@ class DataMixin:
     def tearDown(self):
         self.tmpdir.cleanup()
 
-    def assertContentEqual(self, left, right, *, check_data=False,  # noqa
+    def assertContentEqual(self, left, right, *,  # noqa
                            check_path=False):
         if not isinstance(left, Content):
             raise ValueError('%s is not a Content' % left)
         if isinstance(right, Content):
             right = right.get_data()
 
+        # Compare dictionaries
+
         keys = DEFAULT_ALGORITHMS | {
             'length',
             'perms',
         }
-        if check_data:
-            keys |= {'data'}
         if check_path:
             keys |= {'path'}
 
@@ -449,7 +502,10 @@ class DataMixin:
         if isinstance(right, Directory):
             right = right.get_data()
 
-        return self.assertCountEqual(left.entries, right['entries'])
+        assert left.entries == right['entries']
+        assert left.hash == right['id']
+
+        assert left.to_model() == model.Directory.from_dict(right)
 
     def make_contents(self, directory):
         for filename, content in self.contents.items():
@@ -499,6 +555,19 @@ class SymlinkToContent(DataMixin, unittest.TestCase):
             conv_content = Content.from_symlink(path=path, mode=perms)
             self.assertContentEqual(conv_content, symlink)
 
+    def test_symlink_to_base_model(self):
+        for filename, symlink in self.symlinks.items():
+            path = os.path.join(self.tmpdir_name, filename)
+            perms = 0o120000
+            model_content = \
+                Content.from_symlink(path=path, mode=perms).to_model()
+
+            right = symlink.copy()
+            for key in ('perms', 'path', 'mode'):
+                right.pop(key, None)
+            right['status'] = 'visible'
+            assert model_content == model.Content.from_dict(right)
+
 
 class FileToContent(DataMixin, unittest.TestCase):
     def setUp(self):
@@ -507,34 +576,128 @@ class FileToContent(DataMixin, unittest.TestCase):
         self.make_symlinks(self.tmpdir_name)
         self.make_specials(self.tmpdir_name)
 
+    def test_symlink_to_content(self):
+        for filename, symlink in self.symlinks.items():
+            path = os.path.join(self.tmpdir_name, filename)
+            conv_content = Content.from_file(path=path)
+            self.assertContentEqual(conv_content, symlink)
+
     def test_file_to_content(self):
-        # Check whether loading the data works
-        for data in [True, False]:
+        for filename, content in self.contents.items():
+            path = os.path.join(self.tmpdir_name, filename)
+            conv_content = Content.from_file(path=path)
+            self.assertContentEqual(conv_content, content)
+
+    def test_special_to_content(self):
+        for filename in self.specials:
+            path = os.path.join(self.tmpdir_name, filename)
+            conv_content = Content.from_file(path=path)
+            self.assertContentEqual(conv_content, self.empty_content)
+
+        for path in ['/dev/null', '/dev/zero']:
+            path = os.path.join(self.tmpdir_name, filename)
+            conv_content = Content.from_file(path=path)
+            self.assertContentEqual(conv_content, self.empty_content)
+
+    def test_symlink_to_content_model(self):
+        for filename, symlink in self.symlinks.items():
+            path = os.path.join(self.tmpdir_name, filename)
+            model_content = Content.from_file(path=path).to_model()
+
+            right = symlink.copy()
+            for key in ('perms', 'path', 'mode'):
+                right.pop(key, None)
+            right['status'] = 'visible'
+            assert model_content == model.Content.from_dict(right)
+
+    def test_file_to_content_model(self):
+        for filename, content in self.contents.items():
+            path = os.path.join(self.tmpdir_name, filename)
+            model_content = Content.from_file(path=path).to_model()
+
+            right = content.copy()
+            for key in ('perms', 'mode'):
+                right.pop(key, None)
+            assert model_content.with_data() == model.Content.from_dict(right)
+
+            right['path'] = path
+            del right['data']
+            assert model_content == DiskBackedContent.from_dict(right)
+
+    def test_special_to_content_model(self):
+        for filename in self.specials:
+            path = os.path.join(self.tmpdir_name, filename)
+            model_content = Content.from_file(path=path).to_model()
+
+            right = self.empty_content.copy()
+            for key in ('perms', 'path', 'mode'):
+                right.pop(key, None)
+            right['status'] = 'visible'
+            assert model_content == model.Content.from_dict(right)
+
+        for path in ['/dev/null', '/dev/zero']:
+            model_content = Content.from_file(path=path).to_model()
+
+            right = self.empty_content.copy()
+            for key in ('perms', 'path', 'mode'):
+                right.pop(key, None)
+            right['status'] = 'visible'
+            assert model_content == model.Content.from_dict(right)
+
+    def test_symlink_max_length(self):
+        for max_content_length in [4, 10]:
             for filename, symlink in self.symlinks.items():
                 path = os.path.join(self.tmpdir_name, filename)
-                conv_content = Content.from_file(path=path, data=data)
-                self.assertContentEqual(conv_content, symlink, check_data=data)
+                content = Content.from_file(path=path)
+                if content.data['length'] > max_content_length:
+                    with pytest.raises(Exception, match='too large'):
+                        Content.from_file(
+                            path=path,
+                            max_content_length=max_content_length)
+                else:
+                    limited_content = Content.from_file(
+                        path=path,
+                        max_content_length=max_content_length)
+                    assert content == limited_content
 
+    def test_file_max_length(self):
+        for max_content_length in [2, 4]:
             for filename, content in self.contents.items():
                 path = os.path.join(self.tmpdir_name, filename)
-                conv_content = Content.from_file(path=path, data=data)
-                self.assertContentEqual(conv_content, content, check_data=data)
+                content = Content.from_file(path=path)
+                limited_content = Content.from_file(
+                    path=path,
+                    max_content_length=max_content_length)
+                assert content.data['length'] == limited_content.data['length']
+                assert content.data['status'] == 'visible'
+                if content.data['length'] > max_content_length:
+                    assert limited_content.data['status'] == 'absent'
+                    assert limited_content.data['reason'] \
+                        == 'Content too large'
+                else:
+                    assert limited_content.data['status'] == 'visible'
 
+    def test_special_file_max_length(self):
+        for max_content_length in [None, 0, 1]:
             for filename in self.specials:
                 path = os.path.join(self.tmpdir_name, filename)
-                conv_content = Content.from_file(path=path, data=data)
-                self.assertContentEqual(conv_content, self.empty_content)
+                content = Content.from_file(path=path)
+                limited_content = Content.from_file(
+                    path=path,
+                    max_content_length=max_content_length)
+                assert limited_content == content
 
     def test_file_to_content_with_path(self):
         for filename, content in self.contents.items():
             content_w_path = content.copy()
             path = os.path.join(self.tmpdir_name, filename)
             content_w_path['path'] = path
-            conv_content = Content.from_file(path=path, save_path=True)
+            conv_content = Content.from_file(path=path)
             self.assertContentEqual(conv_content, content_w_path,
                                     check_path=True)
 
 
+@pytest.mark.fs
 class DirectoryToObjects(DataMixin, unittest.TestCase):
     def setUp(self):
         super().setUp()
@@ -685,6 +848,18 @@ class DirectoryToObjects(DataMixin, unittest.TestCase):
                          len(self.contents)
                          + 1)
 
+    def test_directory_entry_order(self):
+        with tempfile.TemporaryDirectory() as dirname:
+            dirname = os.fsencode(dirname)
+            open(os.path.join(dirname, b'foo.'), 'a')
+            open(os.path.join(dirname, b'foo0'), 'a')
+            os.mkdir(os.path.join(dirname, b'foo'))
+
+            directory = Directory.from_disk(path=dirname)
+
+        assert [entry['name'] for entry in directory.entries] \
+            == [b'foo.', b'foo', b'foo0']
+
 
 @pytest.mark.fs
 class TarballTest(DataMixin, unittest.TestCase):
@@ -697,12 +872,12 @@ class TarballTest(DataMixin, unittest.TestCase):
             path=os.path.join(self.tmpdir_name, b'sample-folder')
         )
 
-        for name, data in self.tarball_contents.items():
+        for name, expected in self.tarball_contents.items():
             obj = directory[name]
             if isinstance(obj, Content):
-                self.assertContentEqual(obj, data)
+                self.assertContentEqual(obj, expected)
             elif isinstance(obj, Directory):
-                self.assertDirectoryEqual(obj, data)
+                self.assertDirectoryEqual(obj, expected)
             else:
                 raise self.failureException('Unknown type for %s' % obj)
 
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
index bddf0bca9fcec83e7b6b8643aec00dfc62997109..d5f0f1d46ea103c875c7f3b61f587a53359e3d2d 100644
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -113,7 +113,7 @@ class ContentIdentifier(unittest.TestCase):
 
 
 directory_example = {
-    'id': 'c2e41aae41ac17bd4a650770d6ee77f62e52235b',
+    'id': 'd7ed3d2c31d608823be58b1cbe57605310615231',
     'entries': [
         {
             'type': 'file',
@@ -198,7 +198,28 @@ directory_example = {
             'perms': 57344,
             'name': b'will_paginate',
             'target': '3d531e169db92a16a9a8974f0ae6edf52e52659e'
-        }
+        },
+
+        # in git order, the dir named "order" should be between the files
+        # named "order." and "order0"
+        {
+            'type': 'dir',
+            'perms': 16384,
+            'name': b'order',
+            'target': '62cdb7020ff920e5aa642c3d4066950dd1f01f4d'
+        },
+        {
+            'type': 'file',
+            'perms': 16384,
+            'name': b'order.',
+            'target': '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
+        },
+        {
+            'type': 'file',
+            'perms': 16384,
+            'name': b'order0',
+            'target': 'bbe960a25ea311d21d40669e93df2003ba9b90a2'
+        },
     ],
 }
 
@@ -217,6 +238,13 @@ class DirectoryIdentifier(unittest.TestCase):
             identifiers.directory_identifier(self.directory),
             self.directory['id'])
 
+    def test_dir_identifier_entry_order(self):
+        # Reverse order of entries, check the id is still the same.
+        directory = {'entries': reversed(self.directory['entries'])}
+        self.assertEqual(
+            identifiers.directory_identifier(directory),
+            self.directory['id'])
+
     def test_dir_identifier_empty_directory(self):
         self.assertEqual(
             identifiers.directory_identifier(self.empty_directory),
diff --git a/swh/model/tests/test_merkle.py b/swh/model/tests/test_merkle.py
index 8b1180a4094c19005b19ea52d8879fac9ac405fb..734f7c036143163a24b7e9c9be3be9103d6070fa 100644
--- a/swh/model/tests/test_merkle.py
+++ b/swh/model/tests/test_merkle.py
@@ -46,6 +46,14 @@ class TestMerkleLeaf(unittest.TestCase):
         self.data = {'value': b'value'}
         self.instance = MerkleTestLeaf(self.data)
 
+    def test_equality(self):
+        leaf1 = MerkleTestLeaf(self.data)
+        leaf2 = MerkleTestLeaf(self.data)
+        leaf3 = MerkleTestLeaf({})
+
+        self.assertEqual(leaf1, leaf2)
+        self.assertNotEqual(leaf1, leaf3)
+
     def test_hash(self):
         self.assertEqual(self.instance.compute_hash_called, 0)
         instance_hash = self.instance.hash
@@ -114,6 +122,20 @@ class TestMerkleNode(unittest.TestCase):
                     node2[j] = node3
                     self.nodes[value3] = node3
 
+    def test_equality(self):
+        node1 = merkle.MerkleNode({'foo': b'bar'})
+        node2 = merkle.MerkleNode({'foo': b'bar'})
+        node3 = merkle.MerkleNode({})
+
+        self.assertEqual(node1, node2)
+        self.assertNotEqual(node1, node3, node1 == node3)
+
+        node1['foo'] = node3
+        self.assertNotEqual(node1, node2)
+
+        node2['foo'] = node3
+        self.assertEqual(node1, node2)
+
     def test_hash(self):
         for node in self.nodes.values():
             self.assertEqual(node.compute_hash_called, 0)
@@ -162,6 +184,10 @@ class TestMerkleNode(unittest.TestCase):
         collected2 = self.root.collect()
         self.assertEqual(collected2, {})
 
+    def test_iter_tree(self):
+        nodes = list(self.root.iter_tree())
+        self.assertCountEqual(nodes, self.nodes.values())
+
     def test_get(self):
         for key in (b'a', b'b', b'c'):
             self.assertEqual(self.root[key], self.nodes[b'root/' + key])
diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py
index 5560127ed87e1f7e961c25e00e9d8c39db492d07..a97c3926b7c3d500b3201ff63fee6c312b755790 100644
--- a/swh/model/tests/test_model.py
+++ b/swh/model/tests/test_model.py
@@ -4,10 +4,16 @@
 # See top-level LICENSE file for more information
 
 import copy
+import datetime
 
 from hypothesis import given
+import pytest
 
-from swh.model.model import Content, Directory, Revision, Release, Snapshot
+from swh.model.model import (
+    Content, Directory, Revision, Release, Snapshot,
+    Timestamp, TimestampWithTimezone,
+    MissingData,
+)
 from swh.model.hashutil import hash_to_bytes
 from swh.model.hypothesis_strategies import objects, origins, origin_visits
 from swh.model.identifiers import (
@@ -54,6 +60,53 @@ def test_todict_origin_visits(origin_visit):
     assert origin_visit == type(origin_visit).from_dict(obj)
 
 
+def test_timestampwithtimezone_from_datetime():
+    tz = datetime.timezone(datetime.timedelta(minutes=+60))
+    date = datetime.datetime(
+        2020, 2, 27, 14, 39, 19, tzinfo=tz)
+
+    tstz = TimestampWithTimezone.from_datetime(date)
+
+    assert tstz == TimestampWithTimezone(
+        timestamp=Timestamp(
+            seconds=1582810759,
+            microseconds=0,
+        ),
+        offset=60,
+        negative_utc=False,
+    )
+
+
+def test_timestampwithtimezone_from_iso8601():
+    date = '2020-02-27 14:39:19.123456+0100'
+
+    tstz = TimestampWithTimezone.from_iso8601(date)
+
+    assert tstz == TimestampWithTimezone(
+        timestamp=Timestamp(
+            seconds=1582810759,
+            microseconds=123456,
+        ),
+        offset=60,
+        negative_utc=False,
+    )
+
+
+def test_timestampwithtimezone_from_iso8601_negative_utc():
+    date = '2020-02-27 13:39:19-0000'
+
+    tstz = TimestampWithTimezone.from_iso8601(date)
+
+    assert tstz == TimestampWithTimezone(
+        timestamp=Timestamp(
+            seconds=1582810759,
+            microseconds=0,
+        ),
+        offset=0,
+        negative_utc=True,
+    )
+
+
 def test_content_get_hash():
     hashes = dict(
         sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
@@ -62,6 +115,28 @@ def test_content_get_hash():
         assert c.get_hash(hash_name) == hash_
 
 
+def test_content_hashes():
+    hashes = dict(
+        sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+    c = Content(length=42, status='visible', **hashes)
+    assert c.hashes() == hashes
+
+
+def test_content_data():
+    c = Content(
+        length=42, status='visible', data=b'foo',
+        sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+    assert c.with_data() == c
+
+
+def test_content_data_missing():
+    c = Content(
+        length=42, status='visible',
+        sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+    with pytest.raises(MissingData):
+        c.with_data()
+
+
 def test_directory_model_id_computation():
     dir_dict = dict(directory_example)
     del dir_dict['id']
diff --git a/version.txt b/version.txt
index fc9120ade4f5e100bf204c973fc37610cb316167..1638fac856587a23fc6b1556404dc62814899986 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-v0.0.56-0-gfcfbd4d
\ No newline at end of file
+v0.0.57-0-gf7f18a3
\ No newline at end of file