diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py
index e1afc6b390620edd141c38833bc06f51dd980ae5..583df11cf3cccb4a4dfe4fc4356d67034bb8d637 100644
--- a/swh/model/from_disk.py
+++ b/swh/model/from_disk.py
@@ -7,15 +7,36 @@ import enum
 import os
 import stat
 
-from typing import List
+import attr
+from typing import List, Optional
 
-from .hashutil import MultiHash, HASH_BLOCK_SIZE
+from .hashutil import MultiHash
 from .merkle import MerkleLeaf, MerkleNode
 from .identifiers import (
     directory_entry_sort_key, directory_identifier,
     identifier_to_bytes as id_to_bytes,
     identifier_to_str as id_to_str,
 )
+from . import model
+
+
+@attr.s
+class DiskBackedContent(model.Content):
+    """Subclass of Content, which allows lazy-loading data from the disk."""
+    path = attr.ib(type=Optional[bytes], default=None)
+
+    def __attrs_post_init__(self):
+        if self.path is None:
+            raise TypeError('path must not be None.')
+
+    def with_data(self) -> model.Content:
+        args = self.to_dict()
+        del args['path']
+        assert self.path is not None
+        with open(self.path, 'rb') as fd:
+            return model.Content.from_dict({
+                **args,
+                'data': fd.read()})
 
 
 class DentryPerms(enum.IntEnum):
@@ -94,8 +115,7 @@ class Content(MerkleLeaf):
 
     @classmethod
     def from_file(
-            cls, *, path, data=False, save_path=False,
-            max_content_length=None):
+            cls, *, path, max_content_length=None):
         """Compute the Software Heritage content entry corresponding to an
         on-disk file.
 
@@ -104,9 +124,6 @@ class Content(MerkleLeaf):
         - using the content as a directory entry in a directory
 
         Args:
-          path (bytes): path to the file for which we're computing the
-            content entry
-          data (bool): add the file data to the entry
           save_path (bool): add the file path to the entry
           max_content_length (Optional[int]): if given, all contents larger
             than this will be skipped.
@@ -137,36 +154,23 @@ class Content(MerkleLeaf):
 
         if too_large:
             skip_reason = 'Content too large'
-        elif not data:
-            skip_reason = 'Skipping file content'
         else:
             skip_reason = None
 
+        hashes = MultiHash.from_path(path).digest()
         if skip_reason:
             ret = {
-                **MultiHash.from_path(path).digest(),
+                **hashes,
                 'status': 'absent',
                 'reason': skip_reason,
             }
         else:
-            h = MultiHash(length=length)
-            chunks = []
-            with open(path, 'rb') as fobj:
-                while True:
-                    chunk = fobj.read(HASH_BLOCK_SIZE)
-                    if not chunk:
-                        break
-                    h.update(chunk)
-                    chunks.append(chunk)
-
             ret = {
-                **h.digest(),
+                **hashes,
                 'status': 'visible',
-                'data': b''.join(chunks),
             }
 
-        if save_path:
-            ret['path'] = path
+        ret['path'] = path
         ret['perms'] = mode_to_perms(mode)
         ret['length'] = length
 
@@ -179,6 +183,18 @@ class Content(MerkleLeaf):
     def compute_hash(self):
         return self.data['sha1_git']
 
+    def to_model(self) -> model.BaseContent:
+        """Builds a `model.BaseContent` object based on this leaf."""
+        data = self.get_data().copy()
+        data.pop('perms', None)
+        if data['status'] == 'absent':
+            data.pop('path', None)
+            return model.SkippedContent.from_dict(data)
+        elif 'data' in data:
+            return model.Content.from_dict(data)
+        else:
+            return DiskBackedContent.from_dict(data)
+
 
 def accept_all_directories(dirname, entries):
     """Default filter for :func:`Directory.from_disk` accepting all
@@ -250,7 +266,7 @@ class Directory(MerkleNode):
     type = 'directory'
 
     @classmethod
-    def from_disk(cls, *, path, data=False, save_path=False,
+    def from_disk(cls, *, path,
                   dir_filter=accept_all_directories,
                   max_content_length=None):
         """Compute the Software Heritage objects for a given directory tree
@@ -278,8 +294,7 @@ class Directory(MerkleNode):
                 path = os.path.join(root, name)
                 if not os.path.isdir(path) or os.path.islink(path):
                     content = Content.from_file(
-                        path=path, data=data, save_path=save_path,
-                        max_content_length=max_content_length)
+                        path=path, max_content_length=max_content_length)
                     entries[name] = content
                 else:
                     if dir_filter(name, dirs[path].entries):
@@ -338,6 +353,11 @@ class Directory(MerkleNode):
     def compute_hash(self):
         return id_to_bytes(directory_identifier({'entries': self.entries}))
 
+    def to_model(self) -> model.Directory:
+        """Builds a `model.Directory` object based on this node;
+        ignoring its children."""
+        return model.Directory.from_dict(self.get_data())
+
     def __getitem__(self, key):
         if not isinstance(key, bytes):
             raise ValueError('Can only get a bytes from Directory')
diff --git a/swh/model/model.py b/swh/model/model.py
index 26accb65b0f584bc65fd4dc2ac7e2ce9be5cc3fd..d3c9c7d570015f94eb1df164ee11770fa295b754 100644
--- a/swh/model/model.py
+++ b/swh/model/model.py
@@ -18,6 +18,13 @@ from .identifiers import (
 )
 from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes
 
+
+class MissingData(Exception):
+    """Raised by `Content.with_data` when it has no way of fetching the
+    data (but not when fetching the data fails)."""
+    pass
+
+
 SHA1_SIZE = 20
 
 # TODO: Limit this to 20 bytes
@@ -362,6 +369,10 @@ class Directory(BaseModel, HashableObject):
 
 @attr.s(frozen=True)
 class BaseContent(BaseModel):
+    status = attr.ib(
+        type=str,
+        validator=attr.validators.in_(['visible', 'hidden', 'absent']))
+
     def to_dict(self):
         content = super().to_dict()
         if content['ctime'] is None:
@@ -402,8 +413,8 @@ class Content(BaseContent):
         type=str,
         default='visible',
         validator=attr.validators.in_(['visible', 'hidden']))
-    data = attr.ib(type=Optional[bytes],
-                   default=None)
+
+    data = attr.ib(type=Optional[bytes], default=None)
 
     ctime = attr.ib(type=Optional[datetime.datetime],
                     default=None)
@@ -424,6 +435,16 @@ class Content(BaseContent):
     def from_dict(cls, d):
         return super().from_dict(d, use_subclass=False)
 
+    def with_data(self) -> 'Content':
+        """Loads the `data` attribute; meaning that it is guaranteed not to
+        be None after this call.
+
+        This call is almost a no-op, but subclasses may overload this method
+        to lazy-load data (eg. from disk or objstorage)."""
+        if self.data is None:
+            raise MissingData('Content data is None.')
+        return self
+
 
 @attr.s(frozen=True)
 class SkippedContent(BaseContent):
@@ -432,7 +453,7 @@ class SkippedContent(BaseContent):
     sha256 = attr.ib(type=Optional[bytes])
     blake2s256 = attr.ib(type=Optional[bytes])
 
-    length = attr.ib(type=int)
+    length = attr.ib(type=Optional[int])
 
     status = attr.ib(
         type=str,
diff --git a/swh/model/tests/test_from_disk.py b/swh/model/tests/test_from_disk.py
index 137ac148959840665d55aaaa01211582f61f91d2..d9881a1529e46c71f5484607dab5cb68b0803251 100644
--- a/swh/model/tests/test_from_disk.py
+++ b/swh/model/tests/test_from_disk.py
@@ -12,8 +12,11 @@ import unittest
 from typing import ClassVar, Optional
 
 from swh.model import from_disk
-from swh.model.from_disk import Content, DentryPerms, Directory
+from swh.model.from_disk import (
+    Content, DentryPerms, Directory, DiskBackedContent
+)
 from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
+from swh.model import model
 
 TEST_DATA = os.path.join(os.path.dirname(__file__), 'data')
 
@@ -48,6 +51,57 @@ class ModeToPerms(unittest.TestCase):
             self.assertEqual(perm, from_disk.mode_to_perms(fmode))
 
 
+class TestDiskBackedContent(unittest.TestCase):
+    def test_with_data(self):
+        expected_content = model.Content(
+            length=42, status='visible', data=b'foo bar',
+            sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+        with tempfile.NamedTemporaryFile(mode='w+b') as fd:
+            content = DiskBackedContent(
+                length=42, status='visible', path=fd.name,
+                sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+            fd.write(b'foo bar')
+            fd.seek(0)
+            content_with_data = content.with_data()
+
+        assert expected_content == content_with_data
+
+    def test_lazy_data(self):
+        with tempfile.NamedTemporaryFile(mode='w+b') as fd:
+            fd.write(b'foo')
+            fd.seek(0)
+            content = DiskBackedContent(
+                length=42, status='visible', path=fd.name,
+                sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+            fd.write(b'bar')
+            fd.seek(0)
+            content_with_data = content.with_data()
+            fd.write(b'baz')
+            fd.seek(0)
+
+        assert content_with_data.data == b'bar'
+
+    def test_with_data_cannot_read(self):
+        with tempfile.NamedTemporaryFile(mode='w+b') as fd:
+            content = DiskBackedContent(
+                length=42, status='visible', path=fd.name,
+                sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+
+        with pytest.raises(OSError):
+            content.with_data()
+
+    def test_missing_path(self):
+        with pytest.raises(TypeError):
+            DiskBackedContent(
+                length=42, status='visible',
+                sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+
+        with pytest.raises(TypeError):
+            DiskBackedContent(
+                length=42, status='visible', path=None,
+                sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+
+
 class DataMixin:
     maxDiff = None  # type: ClassVar[Optional[int]]
 
@@ -401,19 +455,19 @@ class DataMixin:
     def tearDown(self):
         self.tmpdir.cleanup()
 
-    def assertContentEqual(self, left, right, *, check_data=False,  # noqa
+    def assertContentEqual(self, left, right, *,  # noqa
                            check_path=False):
         if not isinstance(left, Content):
             raise ValueError('%s is not a Content' % left)
         if isinstance(right, Content):
             right = right.get_data()
 
+        # Compare dictionaries
+
         keys = DEFAULT_ALGORITHMS | {
             'length',
             'perms',
         }
-        if check_data:
-            keys |= {'data'}
         if check_path:
             keys |= {'path'}
 
@@ -449,6 +503,9 @@ class DataMixin:
             right = right.get_data()
 
         assert left.entries == right['entries']
+        assert left.hash == right['id']
+
+        assert left.to_model() == model.Directory.from_dict(right)
 
     def make_contents(self, directory):
         for filename, content in self.contents.items():
@@ -498,6 +555,19 @@ class SymlinkToContent(DataMixin, unittest.TestCase):
             conv_content = Content.from_symlink(path=path, mode=perms)
             self.assertContentEqual(conv_content, symlink)
 
+    def test_symlink_to_base_model(self):
+        for filename, symlink in self.symlinks.items():
+            path = os.path.join(self.tmpdir_name, filename)
+            perms = 0o120000
+            model_content = \
+                Content.from_symlink(path=path, mode=perms).to_model()
+
+            right = symlink.copy()
+            for key in ('perms', 'path', 'mode'):
+                right.pop(key, None)
+            right['status'] = 'visible'
+            assert model_content == model.Content.from_dict(right)
+
 
 class FileToContent(DataMixin, unittest.TestCase):
     def setUp(self):
@@ -507,45 +577,86 @@ class FileToContent(DataMixin, unittest.TestCase):
         self.make_specials(self.tmpdir_name)
 
     def test_symlink_to_content(self):
-        # Check whether loading the data works
-        for data in [True, False]:
-            for filename, symlink in self.symlinks.items():
-                path = os.path.join(self.tmpdir_name, filename)
-                conv_content = Content.from_file(path=path, data=data)
-                self.assertContentEqual(conv_content, symlink, check_data=data)
+        for filename, symlink in self.symlinks.items():
+            path = os.path.join(self.tmpdir_name, filename)
+            conv_content = Content.from_file(path=path)
+            self.assertContentEqual(conv_content, symlink)
 
     def test_file_to_content(self):
-        for data in [True, False]:
-            for filename, content in self.contents.items():
-                path = os.path.join(self.tmpdir_name, filename)
-                conv_content = Content.from_file(path=path, data=data)
-                self.assertContentEqual(conv_content, content, check_data=data)
+        for filename, content in self.contents.items():
+            path = os.path.join(self.tmpdir_name, filename)
+            conv_content = Content.from_file(path=path)
+            self.assertContentEqual(conv_content, content)
 
     def test_special_to_content(self):
-        for data in [True, False]:
-            for filename in self.specials:
-                path = os.path.join(self.tmpdir_name, filename)
-                conv_content = Content.from_file(path=path, data=data)
-                self.assertContentEqual(conv_content, self.empty_content)
+        for filename in self.specials:
+            path = os.path.join(self.tmpdir_name, filename)
+            conv_content = Content.from_file(path=path)
+            self.assertContentEqual(conv_content, self.empty_content)
 
-            for path in ['/dev/null', '/dev/zero']:
-                path = os.path.join(self.tmpdir_name, filename)
-                conv_content = Content.from_file(path=path, data=data)
-                self.assertContentEqual(conv_content, self.empty_content)
+        for path in ['/dev/null', '/dev/zero']:
+            path = os.path.join(self.tmpdir_name, filename)
+            conv_content = Content.from_file(path=path)
+            self.assertContentEqual(conv_content, self.empty_content)
+
+    def test_symlink_to_content_model(self):
+        for filename, symlink in self.symlinks.items():
+            path = os.path.join(self.tmpdir_name, filename)
+            model_content = Content.from_file(path=path).to_model()
+
+            right = symlink.copy()
+            for key in ('perms', 'path', 'mode'):
+                right.pop(key, None)
+            right['status'] = 'visible'
+            assert model_content == model.Content.from_dict(right)
+
+    def test_file_to_content_model(self):
+        for filename, content in self.contents.items():
+            path = os.path.join(self.tmpdir_name, filename)
+            model_content = Content.from_file(path=path).to_model()
+
+            right = content.copy()
+            for key in ('perms', 'mode'):
+                right.pop(key, None)
+            assert model_content.with_data() == model.Content.from_dict(right)
+
+            right['path'] = path
+            del right['data']
+            assert model_content == DiskBackedContent.from_dict(right)
+
+    def test_special_to_content_model(self):
+        for filename in self.specials:
+            path = os.path.join(self.tmpdir_name, filename)
+            model_content = Content.from_file(path=path).to_model()
+
+            right = self.empty_content.copy()
+            for key in ('perms', 'path', 'mode'):
+                right.pop(key, None)
+            right['status'] = 'visible'
+            assert model_content == model.Content.from_dict(right)
+
+        for path in ['/dev/null', '/dev/zero']:
+            model_content = Content.from_file(path=path).to_model()
+
+            right = self.empty_content.copy()
+            for key in ('perms', 'path', 'mode'):
+                right.pop(key, None)
+            right['status'] = 'visible'
+            assert model_content == model.Content.from_dict(right)
 
     def test_symlink_max_length(self):
         for max_content_length in [4, 10]:
             for filename, symlink in self.symlinks.items():
                 path = os.path.join(self.tmpdir_name, filename)
-                content = Content.from_file(path=path, data=True)
+                content = Content.from_file(path=path)
                 if content.data['length'] > max_content_length:
                     with pytest.raises(Exception, match='too large'):
                         Content.from_file(
-                            path=path, data=True,
+                            path=path,
                             max_content_length=max_content_length)
                 else:
                     limited_content = Content.from_file(
-                        path=path, data=True,
+                        path=path,
                         max_content_length=max_content_length)
                     assert content == limited_content
 
@@ -553,9 +664,9 @@ class FileToContent(DataMixin, unittest.TestCase):
         for max_content_length in [2, 4]:
             for filename, content in self.contents.items():
                 path = os.path.join(self.tmpdir_name, filename)
-                content = Content.from_file(path=path, data=True)
+                content = Content.from_file(path=path)
                 limited_content = Content.from_file(
-                    path=path, data=True,
+                    path=path,
                     max_content_length=max_content_length)
                 assert content.data['length'] == limited_content.data['length']
                 assert content.data['status'] == 'visible'
@@ -565,15 +676,14 @@ class FileToContent(DataMixin, unittest.TestCase):
                         == 'Content too large'
                 else:
                     assert limited_content.data['status'] == 'visible'
-                    assert limited_content.data['data'] == content.data['data']
 
     def test_special_file_max_length(self):
         for max_content_length in [None, 0, 1]:
             for filename in self.specials:
                 path = os.path.join(self.tmpdir_name, filename)
-                content = Content.from_file(path=path, data=True)
+                content = Content.from_file(path=path)
                 limited_content = Content.from_file(
-                    path=path, data=True,
+                    path=path,
                     max_content_length=max_content_length)
                 assert limited_content == content
 
@@ -582,7 +692,7 @@ class FileToContent(DataMixin, unittest.TestCase):
             content_w_path = content.copy()
             path = os.path.join(self.tmpdir_name, filename)
             content_w_path['path'] = path
-            conv_content = Content.from_file(path=path, save_path=True)
+            conv_content = Content.from_file(path=path)
             self.assertContentEqual(conv_content, content_w_path,
                                     check_path=True)
 
@@ -762,12 +872,12 @@ class TarballTest(DataMixin, unittest.TestCase):
             path=os.path.join(self.tmpdir_name, b'sample-folder')
         )
 
-        for name, data in self.tarball_contents.items():
+        for name, expected in self.tarball_contents.items():
             obj = directory[name]
             if isinstance(obj, Content):
-                self.assertContentEqual(obj, data)
+                self.assertContentEqual(obj, expected)
             elif isinstance(obj, Directory):
-                self.assertDirectoryEqual(obj, data)
+                self.assertDirectoryEqual(obj, expected)
             else:
                 raise self.failureException('Unknown type for %s' % obj)
 
diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py
index e905725d5624ce1ad311292c9e2111e3b45905a2..82f3dc966457c66bb12d2052f06a5f495bba83a2 100644
--- a/swh/model/tests/test_model.py
+++ b/swh/model/tests/test_model.py
@@ -6,8 +6,10 @@
 import copy
 
 from hypothesis import given
+import pytest
 
 from swh.model.model import Content, Directory, Revision, Release, Snapshot
+from swh.model.model import MissingData
 from swh.model.hashutil import hash_to_bytes
 from swh.model.hypothesis_strategies import objects, origins, origin_visits
 from swh.model.identifiers import (
@@ -69,6 +71,21 @@ def test_content_hashes():
     assert c.hashes() == hashes
 
 
+def test_content_data():
+    c = Content(
+        length=42, status='visible', data=b'foo',
+        sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+    assert c.with_data() == c
+
+
+def test_content_data_missing():
+    c = Content(
+        length=42, status='visible',
+        sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
+    with pytest.raises(MissingData):
+        c.with_data()
+
+
 def test_directory_model_id_computation():
     dir_dict = dict(directory_example)
     del dir_dict['id']