diff --git a/PKG-INFO b/PKG-INFO
index 8b07a8646a12e0deea6602ffbc3799cffea8eba5..b5797c6a895baa9c38fcb8b8f3c171553b5c2124 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,16 +1,14 @@
 Metadata-Version: 2.1
 Name: swh.model
-Version: 6.2.0
+Version: 6.3.0
 Summary: Software Heritage data model
 Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
-License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-model
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-model/
-Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
@@ -42,5 +40,3 @@ This module defines the notion of SoftWare Heritage persistent IDentifiers
    $ swh-identify --no-filename /usr/src/linux/kernel/
    swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab
 ```
-
-
diff --git a/pytest.ini b/pytest.ini
index c5186e582bb1a58fec0cb9bb6b7f30ee4210727f..10242f24f0323cac877764b3d4db78b11aa182f7 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -4,3 +4,5 @@ norecursedirs = build docs .*
 markers =
     fs: tests that involve filesystem ios
     requires_optional_deps: tests in test_cli.py that should not run if optional dependencies are not installed
+
+asyncio_mode = strict
diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO
index 8b07a8646a12e0deea6602ffbc3799cffea8eba5..b5797c6a895baa9c38fcb8b8f3c171553b5c2124 100644
--- a/swh.model.egg-info/PKG-INFO
+++ b/swh.model.egg-info/PKG-INFO
@@ -1,16 +1,14 @@
 Metadata-Version: 2.1
 Name: swh.model
-Version: 6.2.0
+Version: 6.3.0
 Summary: Software Heritage data model
 Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
-License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-model
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-model/
-Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
@@ -42,5 +40,3 @@ This module defines the notion of SoftWare Heritage persistent IDentifiers
    $ swh-identify --no-filename /usr/src/linux/kernel/
    swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab
 ```
-
-
diff --git a/swh/model/git_objects.py b/swh/model/git_objects.py
index 566aaa364b1823cbce01163d0f6dae9d7d891410..41be6f2ea77ed6304e9435ed72b47a7281f8d9df 100644
--- a/swh/model/git_objects.py
+++ b/swh/model/git_objects.py
@@ -240,10 +240,7 @@ def format_git_object_from_headers(
     if message is not None:
         entries.extend((b"\n", message))
 
-    concatenated_entries = b"".join(entries)
-
-    header = git_object_header(git_type, len(concatenated_entries))
-    return header + concatenated_entries
+    return format_git_object_from_parts(git_type, entries)
 
 
 def format_git_object_from_parts(git_type: str, parts: Iterable[bytes]) -> bytes:
diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py
index 106e7c0c8e3a342d09cd431c37004b381d2d8926..75d9f8b40d6ea1a5379e96034c68b2f0b227d88b 100644
--- a/swh/model/hashutil.py
+++ b/swh/model/hashutil.py
@@ -56,7 +56,7 @@ import functools
 import hashlib
 from io import BytesIO
 import os
-from typing import Callable, Dict, Optional
+from typing import Callable, Dict, Optional, Union
 
 ALGORITHMS = set(["sha1", "sha256", "sha1_git", "blake2s256", "blake2b512", "md5"])
 """Hashing algorithms supported by this module"""
@@ -293,7 +293,7 @@ def hash_git_data(data, git_type, base_algo="sha1"):
 
 
 @functools.lru_cache()
-def hash_to_hex(hash):
+def hash_to_hex(hash: Union[str, bytes]) -> str:
     """Converts a hash (in hex or bytes form) to its hexadecimal ascii form
 
     Args:
@@ -309,7 +309,7 @@ def hash_to_hex(hash):
 
 
 @functools.lru_cache()
-def hash_to_bytehex(hash):
+def hash_to_bytehex(hash: bytes) -> bytes:
     """Converts a hash to its hexadecimal bytes representation
 
     Args:
@@ -322,7 +322,7 @@ def hash_to_bytehex(hash):
 
 
 @functools.lru_cache()
-def hash_to_bytes(hash):
+def hash_to_bytes(hash: Union[str, bytes]) -> bytes:
     """Converts a hash (in hex or bytes form) to its raw bytes form
 
     Args:
@@ -338,7 +338,7 @@ def hash_to_bytes(hash):
 
 
 @functools.lru_cache()
-def bytehex_to_hash(hex):
+def bytehex_to_hash(hex: bytes) -> bytes:
     """Converts a hexadecimal bytes representation of a hash to that hash
 
     Args:
diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py
index dabecf962d03c0c7a00895ec215342beb1e31a2a..53c66f00bcfc7789672d4f0f2107b1d8675bcbc9 100644
--- a/swh/model/hypothesis_strategies.py
+++ b/swh/model/hypothesis_strategies.py
@@ -120,22 +120,22 @@ def persons_d(draw):
     return dict(fullname=fullname, name=name, email=email)
 
 
-def persons():
-    return persons_d().map(Person.from_dict)
+def persons(**kwargs):
+    return persons_d(**kwargs).map(Person.from_dict)
 
 
-def timestamps_d():
+def timestamps_d(**kwargs):
     max_seconds = datetime.datetime.max.replace(
         tzinfo=datetime.timezone.utc
     ).timestamp()
     min_seconds = datetime.datetime.min.replace(
         tzinfo=datetime.timezone.utc
     ).timestamp()
-    return builds(
-        dict,
+    defaults = dict(
         seconds=integers(min_seconds, max_seconds),
         microseconds=integers(0, 1000000),
     )
+    return builds(dict, **{**defaults, **kwargs})
 
 
 def timestamps():
@@ -145,6 +145,7 @@ def timestamps():
 @composite
 def timestamps_with_timezone_d(
     draw,
+    *,
     timestamp=timestamps_d(),
     offset=integers(min_value=-14 * 60, max_value=14 * 60),
     negative_utc=booleans(),
@@ -161,35 +162,34 @@ timestamps_with_timezone = timestamps_with_timezone_d().map(
 )
 
 
-def origins_d():
-    return builds(dict, url=iris())
+def origins_d(*, url=iris()):
+    return builds(dict, url=url)
 
 
-def origins():
-    return origins_d().map(Origin.from_dict)
+def origins(**kwargs):
+    return origins_d(**kwargs).map(Origin.from_dict)
 
 
-def origin_visits_d():
-    return builds(
-        dict,
+def origin_visits_d(**kwargs):
+    defaults = dict(
         visit=integers(1, 1000),
         origin=iris(),
         date=aware_datetimes(),
         type=pgsql_text(),
     )
+    return builds(dict, **{**defaults, **kwargs})
 
 
-def origin_visits():
-    return origin_visits_d().map(OriginVisit.from_dict)
+def origin_visits(**kwargs):
+    return origin_visits_d(**kwargs).map(OriginVisit.from_dict)
 
 
 def metadata_dicts():
     return dictionaries(pgsql_text(), pgsql_text())
 
 
-def origin_visit_statuses_d():
-    return builds(
-        dict,
+def origin_visit_statuses_d(**kwargs):
+    defaults = dict(
         visit=integers(1, 1000),
         origin=iris(),
         type=optional(sampled_from(["git", "svn", "pypi", "debian"])),
@@ -200,60 +200,48 @@ def origin_visit_statuses_d():
         snapshot=optional(sha1_git()),
         metadata=optional(metadata_dicts()),
     )
+    return builds(dict, **{**defaults, **kwargs})
 
 
-def origin_visit_statuses():
-    return origin_visit_statuses_d().map(OriginVisitStatus.from_dict)
+def origin_visit_statuses(**kwargs):
+    return origin_visit_statuses_d(**kwargs).map(OriginVisitStatus.from_dict)
 
 
 @composite
-def releases_d(draw):
-    target_type = sampled_from([x.value for x in ObjectType])
-    name = binary()
-    message = optional(binary())
-    synthetic = booleans()
-    target = sha1_git()
-    metadata = optional(revision_metadata())
+def releases_d(draw, **kwargs):
+    defaults = dict(
+        target_type=sampled_from([x.value for x in ObjectType]),
+        name=binary(),
+        message=optional(binary()),
+        synthetic=booleans(),
+        target=sha1_git(),
+        metadata=optional(revision_metadata()),
+        raw_manifest=optional(binary()),
+    )
 
     d = draw(
         one_of(
             # None author/date:
-            builds(
-                dict,
-                name=name,
-                message=message,
-                synthetic=synthetic,
-                author=none(),
-                date=none(),
-                target=target,
-                target_type=target_type,
-                metadata=metadata,
-            ),
+            builds(dict, author=none(), date=none(), **{**defaults, **kwargs}),
             # non-None author/date:
             builds(
                 dict,
-                name=name,
-                message=message,
-                synthetic=synthetic,
                 date=timestamps_with_timezone_d(),
                 author=persons_d(),
-                target=target,
-                target_type=target_type,
-                metadata=metadata,
+                **{**defaults, **kwargs},
             ),
             # it is also possible for date to be None but not author, but let's not
             # overwhelm hypothesis with this edge case
         )
     )
 
-    raw_manifest = draw(optional(binary()))
-    if raw_manifest:
-        d["raw_manifest"] = raw_manifest
+    if d["raw_manifest"] is None:
+        del d["raw_manifest"]
     return d
 
 
-def releases():
-    return releases_d().map(Release.from_dict)
+def releases(**kwargs):
+    return releases_d(**kwargs).map(Release.from_dict)
 
 
 revision_metadata = metadata_dicts
@@ -266,38 +254,36 @@ def extra_headers():
 
 
 @composite
-def revisions_d(draw):
+def revisions_d(draw, **kwargs):
+    defaults = dict(
+        message=optional(binary()),
+        synthetic=booleans(),
+        parents=tuples(sha1_git()),
+        directory=sha1_git(),
+        type=sampled_from([x.value for x in RevisionType]),
+        metadata=optional(revision_metadata()),
+        extra_headers=extra_headers(),
+        raw_manifest=optional(binary()),
+    )
     d = draw(
         one_of(
             # None author/committer/date/committer_date
             builds(
                 dict,
-                message=optional(binary()),
-                synthetic=booleans(),
                 author=none(),
                 committer=none(),
                 date=none(),
                 committer_date=none(),
-                parents=tuples(sha1_git()),
-                directory=sha1_git(),
-                type=sampled_from([x.value for x in RevisionType]),
-                metadata=optional(revision_metadata()),
-                extra_headers=extra_headers(),
+                **{**defaults, **kwargs},
             ),
             # non-None author/committer/date/committer_date
             builds(
                 dict,
-                message=optional(binary()),
-                synthetic=booleans(),
                 author=persons_d(),
                 committer=persons_d(),
                 date=timestamps_with_timezone_d(),
                 committer_date=timestamps_with_timezone_d(),
-                parents=tuples(sha1_git()),
-                directory=sha1_git(),
-                type=sampled_from([x.value for x in RevisionType]),
-                metadata=optional(revision_metadata()),
-                extra_headers=extra_headers(),
+                **{**defaults, **kwargs},
             ),
             # There are many other combinations, but let's not overwhelm hypothesis
             # with these edge cases
@@ -305,67 +291,67 @@ def revisions_d(draw):
     )
     # TODO: metadata['extra_headers'] can have binary keys and values
 
-    raw_manifest = draw(optional(binary()))
-    if raw_manifest:
-        d["raw_manifest"] = raw_manifest
+    if d["raw_manifest"] is None:
+        del d["raw_manifest"]
     return d
 
 
-def revisions():
-    return revisions_d().map(Revision.from_dict)
+def revisions(**kwargs):
+    return revisions_d(**kwargs).map(Revision.from_dict)
 
 
-def directory_entries_d():
+def directory_entries_d(**kwargs):
+    defaults = dict(
+        name=binaries_without_bytes(b"/"),
+        target=sha1_git(),
+    )
     return one_of(
         builds(
             dict,
-            name=binaries_without_bytes(b"/"),
-            target=sha1_git(),
             type=just("file"),
             perms=one_of(
                 integers(min_value=0o100000, max_value=0o100777),  # regular file
                 integers(min_value=0o120000, max_value=0o120777),  # symlink
             ),
+            **{**defaults, **kwargs},
         ),
         builds(
             dict,
-            name=binaries_without_bytes(b"/"),
-            target=sha1_git(),
             type=just("dir"),
             perms=integers(
                 min_value=DentryPerms.directory,
                 max_value=DentryPerms.directory + 0o777,
             ),
+            **{**defaults, **kwargs},
         ),
         builds(
             dict,
-            name=binaries_without_bytes(b"/"),
-            target=sha1_git(),
             type=just("rev"),
             perms=integers(
                 min_value=DentryPerms.revision,
                 max_value=DentryPerms.revision + 0o777,
             ),
+            **{**defaults, **kwargs},
         ),
     )
 
 
-def directory_entries():
-    return directory_entries_d().map(DirectoryEntry)
+def directory_entries(**kwargs):
+    return directory_entries_d(**kwargs).map(DirectoryEntry)
 
 
 @composite
-def directories_d(draw):
+def directories_d(draw, raw_manifest=optional(binary())):
     d = draw(builds(dict, entries=tuples(directory_entries_d())))
 
-    raw_manifest = draw(optional(binary()))
-    if raw_manifest:
-        d["raw_manifest"] = raw_manifest
+    d["raw_manifest"] = draw(raw_manifest)
+    if d["raw_manifest"] is None:
+        del d["raw_manifest"]
     return d
 
 
-def directories():
-    return directories_d().map(Directory.from_dict)
+def directories(**kwargs):
+    return directories_d(**kwargs).map(Directory.from_dict)
 
 
 def contents_d():
@@ -376,21 +362,23 @@ def contents():
     return one_of(present_contents(), skipped_contents())
 
 
-def present_contents_d():
-    return builds(
-        dict,
+def present_contents_d(**kwargs):
+    defaults = dict(
         data=binary(max_size=4096),
         ctime=optional(aware_datetimes()),
         status=one_of(just("visible"), just("hidden")),
     )
+    return builds(dict, **{**defaults, **kwargs})
 
 
-def present_contents():
+def present_contents(**kwargs):
     return present_contents_d().map(lambda d: Content.from_data(**d))
 
 
 @composite
-def skipped_contents_d(draw):
+def skipped_contents_d(
+    draw, reason=pgsql_text(), status=just("absent"), ctime=optional(aware_datetimes())
+):
     result = BaseContent._hash_data(draw(binary(max_size=4096)))
     result.pop("data")
     nullify_attrs = draw(
@@ -398,13 +386,13 @@ def skipped_contents_d(draw):
     )
     for k in nullify_attrs:
         result[k] = None
-    result["reason"] = draw(pgsql_text())
-    result["status"] = "absent"
-    result["ctime"] = draw(optional(aware_datetimes()))
+    result["reason"] = draw(reason)
+    result["status"] = draw(status)
+    result["ctime"] = draw(ctime)
     return result
 
 
-def skipped_contents():
+def skipped_contents(**kwargs):
     return skipped_contents_d().map(SkippedContent.from_dict)
 
 
@@ -492,35 +480,38 @@ def snapshots(*, min_size=0, max_size=100, only_objects=False):
     ).map(Snapshot.from_dict)
 
 
-def metadata_authorities():
-    return builds(MetadataAuthority, url=iris(), metadata=just(None))
+def metadata_authorities(url=iris()):
+    return builds(MetadataAuthority, url=url, metadata=just(None))
 
 
-def metadata_fetchers():
-    return builds(
-        MetadataFetcher,
+def metadata_fetchers(**kwargs):
+    defaults = dict(
         name=text(min_size=1, alphabet=string.printable),
         version=text(
             min_size=1,
             alphabet=string.ascii_letters + string.digits + string.punctuation,
         ),
+    )
+    return builds(
+        MetadataFetcher,
         metadata=just(None),
+        **{**defaults, **kwargs},
     )
 
 
-def raw_extrinsic_metadata():
-    return builds(
-        RawExtrinsicMetadata,
+def raw_extrinsic_metadata(**kwargs):
+    defaults = dict(
         target=extended_swhids(),
         discovery_date=aware_datetimes(),
         authority=metadata_authorities(),
         fetcher=metadata_fetchers(),
         format=text(min_size=1, alphabet=string.printable),
     )
+    return builds(RawExtrinsicMetadata, **{**defaults, **kwargs})
 
 
-def raw_extrinsic_metadata_d():
-    return raw_extrinsic_metadata().map(RawExtrinsicMetadata.to_dict)
+def raw_extrinsic_metadata_d(**kwargs):
+    return raw_extrinsic_metadata(**kwargs).map(RawExtrinsicMetadata.to_dict)
 
 
 def objects(blacklist_types=("origin_visit_status",), split_content=False):
diff --git a/swh/model/model.py b/swh/model/model.py
index 508d41cb46f489671c37cd338c2b9afcb442350f..1073cc61cf4623ba65428b164466b82d4ad35b6c 100644
--- a/swh/model/model.py
+++ b/swh/model/model.py
@@ -16,10 +16,11 @@ method to convert between them and msgpack-serializable objects.
 """
 
 from abc import ABCMeta, abstractmethod
+import collections
 import datetime
 from enum import Enum
 import hashlib
-from typing import Any, Dict, Iterable, Optional, Tuple, Type, TypeVar, Union
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, TypeVar, Union
 
 import attr
 from attrs_strict import AttributeTypeError
@@ -29,7 +30,7 @@ from typing_extensions import Final
 
 from . import git_objects
 from .collections import ImmutableDict
-from .hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_hex
+from .hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytehex, hash_to_hex
 from .swhids import CoreSWHID
 from .swhids import ExtendedObjectType as SwhidExtendedObjectType
 from .swhids import ExtendedSWHID
@@ -266,7 +267,7 @@ class HashableObjectWithManifest(HashableObject):
         attribute is set to an empty value.
         """
         if self.raw_manifest is None:
-            return super().compute_hash()
+            return super().compute_hash()  # calls self._compute_hash_from_attributes()
         else:
             return _compute_hash_from_manifest(self.raw_manifest)
 
@@ -943,12 +944,15 @@ class Revision(HashableObjectWithManifest, BaseModel):
         )
 
 
+_DIR_ENTRY_TYPES = ["file", "dir", "rev"]
+
+
 @attr.s(frozen=True, slots=True)
 class DirectoryEntry(BaseModel):
     object_type: Final = "directory_entry"
 
     name = attr.ib(type=bytes, validator=type_validator())
-    type = attr.ib(type=str, validator=attr.validators.in_(["file", "dir", "rev"]))
+    type = attr.ib(type=str, validator=attr.validators.in_(_DIR_ENTRY_TYPES))
     target = attr.ib(type=Sha1Git, validator=type_validator(), repr=hash_repr)
     perms = attr.ib(type=int, validator=type_validator(), converter=int, repr=oct)
     """Usually one of the values of `swh.model.from_disk.DentryPerms`."""
@@ -996,6 +1000,87 @@ class Directory(HashableObjectWithManifest, BaseModel):
         """Returns a SWHID representing this object."""
         return CoreSWHID(object_type=SwhidObjectType.DIRECTORY, object_id=self.id)
 
+    @classmethod
+    def from_possibly_duplicated_entries(
+        cls,
+        *,
+        entries: Tuple[DirectoryEntry, ...],
+        id: Sha1Git = b"",
+        raw_manifest: Optional[bytes] = None,
+    ) -> Tuple[bool, "Directory"]:
+        """Constructs a ``Directory`` object from a list of entries that may contain
+        duplicated names.
+
+        This is required to represent legacy objects, that were ingested in the
+        storage database before this check was added.
+
+        As it is impossible for a ``Directory`` instances to have more than one entry
+        with a given names, this function computes a ``raw_manifest`` and renames one of
+        the entries before constructing the ``Directory``.
+
+        Returns:
+            ``(is_corrupt, directory)`` where ``is_corrupt`` is True iff some
+            entry names were indeed duplicated
+        """
+        # First, try building a Directory object normally without any extra computation,
+        # which works the overwhelming majority of the time:
+        try:
+            return (False, Directory(entries=entries, id=id, raw_manifest=raw_manifest))
+        except ValueError:
+            pass
+
+        # If it fails:
+        # 1. compute a raw_manifest if there isn't already one:
+        if raw_manifest is None:
+            # invalid_directory behaves like a Directory object, but without the
+            # duplicated entry check; which allows computing its raw_manifest
+            invalid_directory = type("", (), {})()
+            invalid_directory.entries = entries
+            raw_manifest = git_objects.directory_git_object(invalid_directory)
+
+        # 2. look for duplicated entries:
+        entries_by_name: Dict[
+            bytes, Dict[str, List[DirectoryEntry]]
+        ] = collections.defaultdict(lambda: collections.defaultdict(list))
+        for entry in entries:
+            entries_by_name[entry.name][entry.type].append(entry)
+
+        # 3. strip duplicates
+        deduplicated_entries = []
+        for entry_lists in entries_by_name.values():
+            # We could pick one entry at random to keep the original name; but we try to
+            # "minimize" the impact, by preserving entries of type "rev" first
+            # (because renaming them would likely break git submodules entirely
+            # when this directory is written to disk),
+            # then entries of type "dir" (because renaming them affects the path
+            # of every file in the dir, instead of just one "cnt").
+            dir_entry_types = ("rev", "dir", "file")
+            assert set(dir_entry_types) == set(_DIR_ENTRY_TYPES)
+            picked_winner = False  # when True, all future entries must be renamed
+            for type_ in dir_entry_types:
+                for entry in entry_lists[type_]:
+                    if not picked_winner:
+                        # this is the "most important" entry according to this
+                        # heuristic; it gets to keep its name.
+                        deduplicated_entries.append(entry)
+                        picked_winner = True
+                    else:
+                        # the heuristic already found an entry more important than
+                        # this one; so this one must be renamed to something.
+                        # we pick the beginning of its hash, it should be good enough
+                        # to avoid any conflict.
+                        new_name = (
+                            entry.name + b"_" + hash_to_bytehex(entry.target)[0:10]
+                        )
+                        renamed_entry = attr.evolve(entry, name=new_name)
+                        deduplicated_entries.append(renamed_entry)
+
+        # Finally, return the "fixed" the directory
+        dir_ = Directory(
+            entries=tuple(deduplicated_entries), id=id, raw_manifest=raw_manifest
+        )
+        return (True, dir_)
+
 
 @attr.s(frozen=True, slots=True)
 class BaseContent(BaseModel):
diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py
index 590e4b4a2e59ec0c51f3766e10d64aabbdaff943..4540c433db2b00630a8a0366e440a5892c74600d 100644
--- a/swh/model/tests/test_model.py
+++ b/swh/model/tests/test_model.py
@@ -13,7 +13,7 @@ import attr
 from attrs_strict import AttributeTypeError
 import dateutil
 from hypothesis import given
-from hypothesis.strategies import binary
+from hypothesis.strategies import binary, none
 import pytest
 
 from swh.model.collections import ImmutableDict
@@ -841,7 +841,7 @@ def test_content_naive_datetime():
         )
 
 
-@given(strategies.present_contents().filter(lambda cnt: cnt.data is not None))
+@given(strategies.present_contents())
 def test_content_git_roundtrip(content):
     assert content.data is not None
     raw = swh.model.git_objects.content_git_object(content)
@@ -886,7 +886,7 @@ def test_skipped_content_naive_datetime():
 # Directory
 
 
-@given(strategies.directories().filter(lambda d: d.raw_manifest is None))
+@given(strategies.directories(raw_manifest=none()))
 def test_directory_check(directory):
     directory.check()
 
@@ -903,7 +903,7 @@ def test_directory_check(directory):
         directory2.check()
 
 
-@given(strategies.directories().filter(lambda d: d.raw_manifest is None))
+@given(strategies.directories(raw_manifest=none()))
 def test_directory_raw_manifest(directory):
     assert "raw_manifest" not in directory.to_dict()
 
@@ -943,10 +943,147 @@ def test_directory_duplicate_entry_name():
         Directory(entries=entries)
 
 
+@given(strategies.directories())
+def test_directory_from_possibly_duplicated_entries__no_duplicates(directory):
+    """
+    Directory.from_possibly_duplicated_entries should return the directory
+    unchanged if it has no duplicated entry name.
+    """
+    assert (False, directory) == Directory.from_possibly_duplicated_entries(
+        id=directory.id, entries=directory.entries, raw_manifest=directory.raw_manifest
+    )
+    assert (False, directory) == Directory.from_possibly_duplicated_entries(
+        entries=directory.entries, raw_manifest=directory.raw_manifest
+    )
+
+
+@pytest.mark.parametrize("rev_first", [True, False])
+def test_directory_from_possibly_duplicated_entries__rev_and_dir(rev_first):
+    entries = (
+        DirectoryEntry(name=b"foo", type="dir", target=b"\x01" * 20, perms=1),
+        DirectoryEntry(name=b"foo", type="rev", target=b"\x00" * 20, perms=0),
+    )
+    if rev_first:
+        entries = tuple(reversed(entries))
+    (is_corrupt, dir_) = Directory.from_possibly_duplicated_entries(entries=entries)
+    assert is_corrupt
+    assert dir_.entries == (
+        DirectoryEntry(name=b"foo", type="rev", target=b"\x00" * 20, perms=0),
+        DirectoryEntry(
+            name=b"foo_0101010101", type="dir", target=b"\x01" * 20, perms=1
+        ),
+    )
+
+    # order is independent of 'rev_first' because it is always sorted in git order
+    assert dir_.raw_manifest == (
+        # fmt: off
+        b"tree 52\x00"
+        + b"0 foo\x00" + b"\x00" * 20
+        + b"1 foo\x00" + b"\x01" * 20
+        # fmt: on
+    )
+
+
+@pytest.mark.parametrize("file_first", [True, False])
+def test_directory_from_possibly_duplicated_entries__file_and_dir(file_first):
+    entries = (
+        DirectoryEntry(name=b"foo", type="dir", target=b"\x01" * 20, perms=1),
+        DirectoryEntry(name=b"foo", type="file", target=b"\x00" * 20, perms=0),
+    )
+    if file_first:
+        entries = tuple(reversed(entries))
+    (is_corrupt, dir_) = Directory.from_possibly_duplicated_entries(entries=entries)
+    assert is_corrupt
+    assert dir_.entries == (
+        DirectoryEntry(name=b"foo", type="dir", target=b"\x01" * 20, perms=1),
+        DirectoryEntry(
+            name=b"foo_0000000000", type="file", target=b"\x00" * 20, perms=0
+        ),
+    )
+
+    # order is independent of 'file_first' because it is always sorted in git order
+    assert dir_.raw_manifest == (
+        # fmt: off
+        b"tree 52\x00"
+        + b"0 foo\x00" + b"\x00" * 20
+        + b"1 foo\x00" + b"\x01" * 20
+        # fmt: on
+    )
+
+
+def test_directory_from_possibly_duplicated_entries__two_files1():
+    entries = (
+        DirectoryEntry(name=b"foo", type="file", target=b"\x01" * 20, perms=1),
+        DirectoryEntry(name=b"foo", type="file", target=b"\x00" * 20, perms=0),
+    )
+    (is_corrupt, dir_) = Directory.from_possibly_duplicated_entries(entries=entries)
+    assert is_corrupt
+
+    assert dir_.entries == (
+        DirectoryEntry(name=b"foo", type="file", target=b"\x01" * 20, perms=1),
+        DirectoryEntry(
+            name=b"foo_0000000000", type="file", target=b"\x00" * 20, perms=0
+        ),
+    )
+    assert dir_.raw_manifest == (
+        # fmt: off
+        b"tree 52\x00"
+        + b"1 foo\x00" + b"\x01" * 20
+        + b"0 foo\x00" + b"\x00" * 20
+        # fmt: on
+    )
+
+
+def test_directory_from_possibly_duplicated_entries__two_files2():
+    """
+    Same as above, but entries are in a different order (and order matters
+    to break the tie)
+    """
+    entries = (
+        DirectoryEntry(name=b"foo", type="file", target=b"\x00" * 20, perms=0),
+        DirectoryEntry(name=b"foo", type="file", target=b"\x01" * 20, perms=1),
+    )
+    (is_corrupt, dir_) = Directory.from_possibly_duplicated_entries(entries=entries)
+    assert is_corrupt
+
+    assert dir_.entries == (
+        DirectoryEntry(name=b"foo", type="file", target=b"\x00" * 20, perms=0),
+        DirectoryEntry(
+            name=b"foo_0101010101", type="file", target=b"\x01" * 20, perms=1
+        ),
+    )
+    assert dir_.raw_manifest == (
+        # fmt: off
+        b"tree 52\x00"
+        + b"0 foo\x00" + b"\x00" * 20
+        + b"1 foo\x00" + b"\x01" * 20
+        # fmt: on
+    )
+
+
+def test_directory_from_possibly_duplicated_entries__preserve_manifest():
+    entries = (
+        DirectoryEntry(name=b"foo", type="dir", target=b"\x01" * 20, perms=1),
+        DirectoryEntry(name=b"foo", type="rev", target=b"\x00" * 20, perms=0),
+    )
+    (is_corrupt, dir_) = Directory.from_possibly_duplicated_entries(
+        entries=entries, raw_manifest=b"blah"
+    )
+    assert is_corrupt
+    assert dir_.entries == (
+        DirectoryEntry(name=b"foo", type="rev", target=b"\x00" * 20, perms=0),
+        DirectoryEntry(
+            name=b"foo_0101010101", type="dir", target=b"\x01" * 20, perms=1
+        ),
+    )
+
+    assert dir_.raw_manifest == b"blah"
+
+
 # Release
 
 
-@given(strategies.releases().filter(lambda rel: rel.raw_manifest is None))
+@given(strategies.releases(raw_manifest=none()))
 def test_release_check(release):
     release.check()
 
@@ -963,7 +1100,7 @@ def test_release_check(release):
         release2.check()
 
 
-@given(strategies.releases().filter(lambda rev: rev.raw_manifest is None))
+@given(strategies.releases(raw_manifest=none()))
 def test_release_raw_manifest(release):
     raw_manifest = b"foo"
     id_ = hashlib.new("sha1", raw_manifest).digest()
@@ -983,7 +1120,7 @@ def test_release_raw_manifest(release):
 # Revision
 
 
-@given(strategies.revisions().filter(lambda rev: rev.raw_manifest is None))
+@given(strategies.revisions(raw_manifest=none()))
 def test_revision_check(revision):
     revision.check()
 
@@ -1000,7 +1137,7 @@ def test_revision_check(revision):
         revision2.check()
 
 
-@given(strategies.revisions().filter(lambda rev: rev.raw_manifest is None))
+@given(strategies.revisions(raw_manifest=none()))
 def test_revision_raw_manifest(revision):
 
     raw_manifest = b"foo"