From f56becc196ed6dd4b211c97096654c4400b047ec Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Thu, 23 Sep 2021 16:52:22 +0200
Subject: [PATCH] Deprecate identifiers.py

1. Add a warning
2. Move identifier/manifest documentation to git_objects.py
3. Remove all imports of that module.

Motivation:

* SWHID classes were moved to swhids.py
* manifest computation functions were moved to git_objects.py
* Only reexports and trivial wrappers of model.py remain
---
 swh/model/cli.py                    |  23 +--
 swh/model/from_disk.py              |  16 +-
 swh/model/git_objects.py            | 200 ++++++++++++++++++-
 swh/model/hypothesis_strategies.py  |   7 +-
 swh/model/identifiers.py            | 270 ++++----------------------
 swh/model/tests/swh_model_data.py   |   2 +-
 swh/model/tests/test_identifiers.py | 288 +++++++++++++++-------------
 swh/model/tests/test_model.py       | 105 +---------
 8 files changed, 403 insertions(+), 508 deletions(-)

diff --git a/swh/model/cli.py b/swh/model/cli.py
index e547aeb6..ede67e22 100644
--- a/swh/model/cli.py
+++ b/swh/model/cli.py
@@ -26,7 +26,7 @@ except ImportError:
     swh_cli_group = click  # type: ignore
 
 from swh.model.from_disk import Directory
-from swh.model.identifiers import CoreSWHID, ObjectType
+from swh.model.swhids import CoreSWHID
 
 CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
 
@@ -42,7 +42,7 @@ _DULWICH_TYPES = {
 
 class CoreSWHIDParamType(click.ParamType):
     """Click argument that accepts a core SWHID and returns them as
-    :class:`swh.model.identifiers.CoreSWHID` instances """
+    :class:`swh.model.swhids.CoreSWHID` instances """
 
     name = "SWHID"
 
@@ -87,17 +87,9 @@ def swhid_of_dir(path: bytes, exclude_patterns: Iterable[bytes] = None) -> CoreS
 
 
 def swhid_of_origin(url):
-    from swh.model.hashutil import hash_to_bytes
-    from swh.model.identifiers import (
-        ExtendedObjectType,
-        ExtendedSWHID,
-        origin_identifier,
-    )
+    from swh.model.model import Origin
 
-    return ExtendedSWHID(
-        object_type=ExtendedObjectType.ORIGIN,
-        object_id=hash_to_bytes(origin_identifier({"url": url})),
-    )
+    return Origin(url).swhid()
 
 
 def swhid_of_git_repo(path) -> CoreSWHID:
@@ -110,7 +102,7 @@ def swhid_of_git_repo(path) -> CoreSWHID:
         )
 
     from swh.model import hashutil
-    from swh.model.identifiers import snapshot_identifier
+    from swh.model.model import Snapshot
 
     repo = dulwich.repo.Repo(path)
 
@@ -133,10 +125,7 @@ def swhid_of_git_repo(path) -> CoreSWHID:
 
     snapshot = {"branches": branches}
 
-    return CoreSWHID(
-        object_type=ObjectType.SNAPSHOT,
-        object_id=hashutil.hash_to_bytes(snapshot_identifier(snapshot)),
-    )
+    return Snapshot.from_dict(snapshot).swhid()
 
 
 def identify_object(
diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py
index 8fdd55ef..2ae893f1 100644
--- a/swh/model/from_disk.py
+++ b/swh/model/from_disk.py
@@ -18,14 +18,10 @@ from typing_extensions import Final
 
 from . import model
 from .exceptions import InvalidDirectoryPath
-from .hashutil import MultiHash, hash_to_bytes, hash_to_hex
-from .identifiers import (
-    CoreSWHID,
-    ObjectType,
-    directory_entry_sort_key,
-    directory_identifier,
-)
+from .git_objects import directory_entry_sort_key
+from .hashutil import MultiHash, hash_to_hex
 from .merkle import MerkleLeaf, MerkleNode
+from .swhids import CoreSWHID, ObjectType
 
 
 @attr.s(frozen=True, slots=True)
@@ -477,8 +473,8 @@ class Directory(MerkleNode):
 
     @property
     def entries(self):
-        """Child nodes, sorted by name in the same way `directory_identifier`
-        does."""
+        """Child nodes, sorted by name in the same way
+        :func:`swh.model.git_objects.directory_git_object` does."""
         if self.__entries is None:
             self.__entries = sorted(
                 (
@@ -496,7 +492,7 @@ class Directory(MerkleNode):
         return CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=self.hash)
 
     def compute_hash(self):
-        return hash_to_bytes(directory_identifier({"entries": self.entries}))
+        return model.Directory.from_dict({"entries": self.entries}).id
 
     def to_model(self) -> model.Directory:
         """Builds a `model.Directory` object based on this node;
diff --git a/swh/model/git_objects.py b/swh/model/git_objects.py
index 16e69e76..1fb668ab 100644
--- a/swh/model/git_objects.py
+++ b/swh/model/git_objects.py
@@ -27,7 +27,7 @@ def directory_entry_sort_key(entry: model.DirectoryEntry):
 
 @lru_cache()
 def _perms_to_bytes(perms):
-    """Convert the perms value to its bytes representation"""
+    """Convert the perms value to its canonical bytes representation"""
     oc = oct(perms)[2:]
     return oc.encode("ascii")
 
@@ -117,7 +117,6 @@ def normalize_timestamp(time_representation):
               UTC
             - negative_utc: a boolean representing whether the offset is -0000
               when offset = 0.
-
     """
     if time_representation is None:
         return None
@@ -126,6 +125,41 @@ def normalize_timestamp(time_representation):
 
 
 def directory_git_object(directory: model.Directory) -> bytes:
+    """Formats a directory as a git tree.
+
+    A directory's identifier is the tree sha1 à la git of a directory listing,
+    using the following algorithm, which is equivalent to the git algorithm for
+    trees:
+
+    1. Entries of the directory are sorted using the name (or the name with '/'
+       appended for directory entries) as key, in bytes order.
+
+    2. For each entry of the directory, the following bytes are output:
+
+      - the octal representation of the permissions for the entry (stored in
+        the 'perms' member), which is a representation of the entry type:
+
+        - b'100644' (int 33188) for files
+        - b'100755' (int 33261) for executable files
+        - b'120000' (int 40960) for symbolic links
+        - b'40000'  (int 16384) for directories
+        - b'160000' (int 57344) for references to revisions
+
+      - an ascii space (b'\x20')
+      - the entry's name (as raw bytes), stored in the 'name' member
+      - a null byte (b'\x00')
+      - the 20 byte long identifier of the object pointed at by the entry,
+        stored in the 'target' member:
+
+        - for files or executable files: their blob sha1_git
+        - for symbolic links: the blob sha1_git of a file containing the link
+          destination
+        - for directories: their intrinsic identifier
+        - for revisions: their intrinsic identifier
+
+      (Note that there is no separator between entries)
+
+    """
     if isinstance(directory, dict):
         # For backward compatibility
         directory = model.Directory.from_dict(directory)
@@ -219,7 +253,6 @@ def format_author_data(
 
     Returns:
         the byte string containing the authorship data
-
     """
 
     ret = [author.fullname]
@@ -234,8 +267,55 @@ def format_author_data(
 
 
 def revision_git_object(revision: model.Revision) -> bytes:
-    """Formats the git_object of a revision. See :func:`revision_identifier` for details
-    on the format."""
+    """Formats a revision as a git tree.
+
+    The fields used for the revision identifier computation are:
+
+    - directory
+    - parents
+    - author
+    - author_date
+    - committer
+    - committer_date
+    - extra_headers or metadata -> extra_headers
+    - message
+
+    A revision's identifier is the 'git'-checksum of a commit manifest
+    constructed as follows (newlines are a single ASCII newline character)::
+
+        tree <directory identifier>
+        [for each parent in parents]
+        parent <parent identifier>
+        [end for each parents]
+        author <author> <author_date>
+        committer <committer> <committer_date>
+        [for each key, value in extra_headers]
+        <key> <encoded value>
+        [end for each extra_headers]
+
+        <message>
+
+    The directory identifier is the ascii representation of its hexadecimal
+    encoding.
+
+    Author and committer are formatted using the :attr:`Person.fullname` attribute only.
+    Dates are formatted with the :func:`format_offset` function.
+
+    Extra headers are an ordered list of [key, value] pairs. Keys are strings
+    and get encoded to utf-8 for identifier computation. Values are either byte
+    strings, unicode strings (that get encoded to utf-8), or integers (that get
+    encoded to their utf-8 decimal representation).
+
+    Multiline extra header values are escaped by indenting the continuation
+    lines with one ascii space.
+
+    If the message is None, the manifest ends with the last header. Else, the
+    message is appended to the headers after an empty line.
+
+    The checksum of the full manifest is computed using the 'commit' git object
+    type.
+
+    """
     if isinstance(revision, dict):
         # For backward compatibility
         revision = model.Revision.from_dict(revision)
@@ -290,8 +370,50 @@ def release_git_object(release: model.Release) -> bytes:
 
 
 def snapshot_git_object(snapshot: model.Snapshot) -> bytes:
-    """Formats the git_object of a revision. See :func:`snapshot_identifier` for details
-    on the format."""
+    """Formats a snapshot as a git-like object.
+
+    Snapshots are a set of named branches, which are pointers to objects at any
+    level of the Software Heritage DAG.
+
+    As well as pointing to other objects in the Software Heritage DAG, branches
+    can also be *alias*es, in which case their target is the name of another
+    branch in the same snapshot, or *dangling*, in which case the target is
+    unknown (and represented by the ``None`` value).
+
+    A snapshot identifier is a salted sha1 (using the git hashing algorithm
+    with the ``snapshot`` object type) of a manifest following the algorithm:
+
+    1. Branches are sorted using the name as key, in bytes order.
+
+    2. For each branch, the following bytes are output:
+
+      - the type of the branch target:
+
+        - ``content``, ``directory``, ``revision``, ``release`` or ``snapshot``
+          for the corresponding entries in the DAG;
+        - ``alias`` for branches referencing another branch;
+        - ``dangling`` for dangling branches
+
+      - an ascii space (``\\x20``)
+      - the branch name (as raw bytes)
+      - a null byte (``\\x00``)
+      - the length of the target identifier, as an ascii-encoded decimal number
+        (``20`` for current intrinsic identifiers, ``0`` for dangling
+        branches, the length of the target branch name for branch aliases)
+      - a colon (``:``)
+      - the identifier of the target object pointed at by the branch,
+        stored in the 'target' member:
+
+        - for contents: their *sha1_git*
+        - for directories, revisions, releases or snapshots: their intrinsic
+          identifier
+        - for branch aliases, the name of the target branch (as raw bytes)
+        - for dangling branches, the empty string
+
+      Note that, akin to directory manifests, there is no separator between
+      entries. Because of symbolic branches, identifiers are of arbitrary
+      length but are length-encoded to avoid ambiguity.
+    """
     if isinstance(snapshot, dict):
         # For backward compatibility
         snapshot = model.Snapshot.from_dict(snapshot)
@@ -334,9 +456,47 @@ def snapshot_git_object(snapshot: model.Snapshot) -> bytes:
 
 
 def raw_extrinsic_metadata_git_object(metadata: model.RawExtrinsicMetadata) -> bytes:
-    """Formats the git_object of a raw_extrinsic_metadata object.
-    See :func:`raw_extrinsic_metadata_identifier` for details
-    on the format."""
+    """Formats RawExtrinsicMetadata as a git-like object.
+
+    A raw_extrinsic_metadata identifier is a salted sha1 (using the git
+    hashing algorithm with the ``raw_extrinsic_metadata`` object type) of
+    a manifest following the format::
+
+        target $ExtendedSwhid
+        discovery_date $Timestamp
+        authority $StrWithoutSpaces $IRI
+        fetcher $Str $Version
+        format $StrWithoutSpaces
+        origin $IRI                         <- optional
+        visit $IntInDecimal                 <- optional
+        snapshot $CoreSwhid                 <- optional
+        release $CoreSwhid                  <- optional
+        revision $CoreSwhid                 <- optional
+        path $Bytes                         <- optional
+        directory $CoreSwhid                <- optional
+
+        $MetadataBytes
+
+    $IRI must be RFC 3987 IRIs (so they may contain newlines, that are escaped as
+    described below)
+
+    $StrWithoutSpaces and $Version are ASCII strings, and may not contain spaces.
+
+    $Str is an UTF-8 string.
+
+    $CoreSwhid are core SWHIDs, as defined in :ref:`persistent-identifiers`.
+    $ExtendedSwhid is a core SWHID, with extra types allowed ('ori' for
+    origins and 'emd' for raw extrinsic metadata)
+
+    $Timestamp is a decimal representation of the rounded-down integer number of
+    seconds since the UNIX epoch (1970-01-01 00:00:00 UTC),
+    with no leading '0' (unless the timestamp value is zero) and no timezone.
+    It may be negative by prefixing it with a '-', which must not be followed
+    by a '0'.
+
+    Newlines in $Bytes, $Str, and $Iri are escaped as with other git fields,
+    ie. by adding a space after them.
+    """
     if isinstance(metadata, dict):
         # For backward compatibility
         metadata = model.RawExtrinsicMetadata.from_dict(metadata)
@@ -389,6 +549,26 @@ def raw_extrinsic_metadata_git_object(metadata: model.RawExtrinsicMetadata) -> b
 
 
 def extid_git_object(extid: model.ExtID) -> bytes:
+    """Formats an extid as a gi-like object.
+
+    An ExtID identifier is a salted sha1 (using the git hashing algorithm with
+    the ``extid`` object type) of a manifest following the format:
+
+    ```
+    extid_type $StrWithoutSpaces
+    [extid_version $Str]
+    extid $Bytes
+    target $CoreSwhid
+    ```
+
+    $StrWithoutSpaces is an ASCII string, and may not contain spaces.
+
+    Newlines in $Bytes are escaped as with other git fields, ie. by adding a
+    space after them.
+
+    The extid_version line is only generated if the version is non-zero.
+    """
+
     headers = [
         (b"extid_type", extid.extid_type.encode("ascii")),
     ]
diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py
index 67da0e0b..c8644a39 100644
--- a/swh/model/hypothesis_strategies.py
+++ b/swh/model/hypothesis_strategies.py
@@ -29,8 +29,6 @@ from hypothesis.strategies import (
 )
 
 from .from_disk import DentryPerms
-from .hashutil import hash_to_bytes
-from .identifiers import ExtendedObjectType, ExtendedSWHID, snapshot_identifier
 from .model import (
     BaseContent,
     Content,
@@ -54,6 +52,7 @@ from .model import (
     Timestamp,
     TimestampWithTimezone,
 )
+from .swhids import ExtendedObjectType, ExtendedSWHID
 
 pgsql_alphabet = characters(
     blacklist_categories=("Cs",), blacklist_characters=["\u0000"]
@@ -396,7 +395,7 @@ def snapshots_d(draw, *, min_size=0, max_size=100, only_objects=False):
     # Ensure no cycles between aliases
     while True:
         try:
-            id_ = snapshot_identifier(
+            snapshot = Snapshot.from_dict(
                 {
                     "branches": {
                         name: branch or None for (name, branch) in branches.items()
@@ -409,7 +408,7 @@ def snapshots_d(draw, *, min_size=0, max_size=100, only_objects=False):
         else:
             break
 
-    return dict(id=hash_to_bytes(id_), branches=branches)
+    return snapshot.to_dict()
 
 
 def snapshots(*, min_size=0, max_size=100, only_objects=False):
diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
index 194bc258..6fa63666 100644
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -3,9 +3,8 @@
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-from __future__ import annotations
-
 from typing import Any, Dict
+import warnings
 
 from . import model
 
@@ -16,6 +15,14 @@ from .hashutil import MultiHash, hash_to_hex
 # Reexport for backward compatibility
 from .swhids import *  # noqa
 
+warnings.warn(
+    "The swh.model.identifiers module is deprecated. "
+    "SWHID-related classes were moved to swh.model.swhids, and identifier "
+    "computation is now done directly with swh.model.model classes.",
+    DeprecationWarning,
+    stacklevel=2,
+)
+
 # The following are deprecated aliases of the variants defined in ObjectType
 # while transitioning from SWHID to QualifiedSWHID
 ORIGIN = "origin"
@@ -28,260 +35,57 @@ RAW_EXTRINSIC_METADATA = "raw_extrinsic_metadata"
 
 
 def content_identifier(content: Dict[str, Any]) -> Dict[str, bytes]:
-    """Return the intrinsic identifier for a content.
-
-    A content's identifier is the sha1, sha1_git and sha256 checksums of its
-    data.
-
-    Args:
-        content: a content conforming to the Software Heritage schema
-
-    Returns:
-        A dictionary with all the hashes for the data
-
-    Raises:
-        KeyError: if the content doesn't have a data member.
-
+    """Deprecated, use :class:`swh.model.Content` instead:
+    ``content_identifier(d)`` is equivalent to:
+    ``{k: hash_to_hex(v) for (k, v) in Content.from_data(d["data"]).hashes().items()}``
     """
-
     return MultiHash.from_data(content["data"]).digest()
 
 
 def directory_identifier(directory: Dict[str, Any]) -> str:
-    """Return the intrinsic identifier for a directory.
-
-    A directory's identifier is the tree sha1 à la git of a directory listing,
-    using the following algorithm, which is equivalent to the git algorithm for
-    trees:
+    """Deprecated, use :class:`swh.model.Directory` instead:
+    ``directory_identifier(d)`` is equivalent to:
+    ``hash_to_hex(Directory.from_dict(d).id)``.
 
-    1. Entries of the directory are sorted using the name (or the name with '/'
-       appended for directory entries) as key, in bytes order.
-
-    2. For each entry of the directory, the following bytes are output:
-
-      - the octal representation of the permissions for the entry (stored in
-        the 'perms' member), which is a representation of the entry type:
-
-        - b'100644' (int 33188) for files
-        - b'100755' (int 33261) for executable files
-        - b'120000' (int 40960) for symbolic links
-        - b'40000'  (int 16384) for directories
-        - b'160000' (int 57344) for references to revisions
-
-      - an ascii space (b'\x20')
-      - the entry's name (as raw bytes), stored in the 'name' member
-      - a null byte (b'\x00')
-      - the 20 byte long identifier of the object pointed at by the entry,
-        stored in the 'target' member:
-
-        - for files or executable files: their blob sha1_git
-        - for symbolic links: the blob sha1_git of a file containing the link
-          destination
-        - for directories: their intrinsic identifier
-        - for revisions: their intrinsic identifier
-
-      (Note that there is no separator between entries)
-
-    """
+    See :func:`swh.model.git_objects.directory_git_object` for details of the
+    format used to generate this identifier."""
     return hash_to_hex(model.Directory.from_dict(directory).id)
 
 
 def revision_identifier(revision: Dict[str, Any]) -> str:
-    """Return the intrinsic identifier for a revision.
-
-    The fields used for the revision identifier computation are:
+    """Deprecated, use :class:`swh.model.Revision` instead:
+    ``revision_identifier(d)`` is equivalent to:
+    ``hash_to_hex(Revision.from_dict(d).id)``.
 
-    - directory
-    - parents
-    - author
-    - author_date
-    - committer
-    - committer_date
-    - extra_headers or metadata -> extra_headers
-    - message
-
-    A revision's identifier is the 'git'-checksum of a commit manifest
-    constructed as follows (newlines are a single ASCII newline character)::
-
-        tree <directory identifier>
-        [for each parent in parents]
-        parent <parent identifier>
-        [end for each parents]
-        author <author> <author_date>
-        committer <committer> <committer_date>
-        [for each key, value in extra_headers]
-        <key> <encoded value>
-        [end for each extra_headers]
-
-        <message>
-
-    The directory identifier is the ascii representation of its hexadecimal
-    encoding.
-
-    Author and committer are formatted using the :attr:`Person.fullname` attribute only.
-    Dates are formatted with the :func:`format_offset` function.
-
-    Extra headers are an ordered list of [key, value] pairs. Keys are strings
-    and get encoded to utf-8 for identifier computation. Values are either byte
-    strings, unicode strings (that get encoded to utf-8), or integers (that get
-    encoded to their utf-8 decimal representation).
-
-    Multiline extra header values are escaped by indenting the continuation
-    lines with one ascii space.
-
-    If the message is None, the manifest ends with the last header. Else, the
-    message is appended to the headers after an empty line.
-
-    The checksum of the full manifest is computed using the 'commit' git object
-    type.
-
-    """
+    See :func:`swh.model.git_objects.revision_git_object` for details of the
+    format used to generate this identifier."""
     return hash_to_hex(model.Revision.from_dict(revision).id)
 
 
 def release_identifier(release: Dict[str, Any]) -> str:
-    """Return the intrinsic identifier for a release."""
+    """Deprecated, use :class:`swh.model.Release` instead:
+    ``release_identifier(d)`` is equivalent to:
+    ``hash_to_hex(Release.from_dict(d).id)``.
+
+    See :func:`swh.model.git_objects.release_git_object` for details of the
+    format used to generate this identifier."""
     return hash_to_hex(model.Release.from_dict(release).id)
 
 
 def snapshot_identifier(snapshot: Dict[str, Any]) -> str:
-    """Return the intrinsic identifier for a snapshot.
-
-    Snapshots are a set of named branches, which are pointers to objects at any
-    level of the Software Heritage DAG.
-
-    As well as pointing to other objects in the Software Heritage DAG, branches
-    can also be *alias*es, in which case their target is the name of another
-    branch in the same snapshot, or *dangling*, in which case the target is
-    unknown (and represented by the ``None`` value).
-
-    A snapshot identifier is a salted sha1 (using the git hashing algorithm
-    with the ``snapshot`` object type) of a manifest following the algorithm:
-
-    1. Branches are sorted using the name as key, in bytes order.
-
-    2. For each branch, the following bytes are output:
-
-      - the type of the branch target:
+    """Deprecated, use :class:`swh.model.Snapshot` instead:
+    ``snapshot_identifier(d)`` is equivalent to:
+    ``hash_to_hex(Snapshot.from_dict(d).id)``.
 
-        - ``content``, ``directory``, ``revision``, ``release`` or ``snapshot``
-          for the corresponding entries in the DAG;
-        - ``alias`` for branches referencing another branch;
-        - ``dangling`` for dangling branches
-
-      - an ascii space (``\\x20``)
-      - the branch name (as raw bytes)
-      - a null byte (``\\x00``)
-      - the length of the target identifier, as an ascii-encoded decimal number
-        (``20`` for current intrinsic identifiers, ``0`` for dangling
-        branches, the length of the target branch name for branch aliases)
-      - a colon (``:``)
-      - the identifier of the target object pointed at by the branch,
-        stored in the 'target' member:
-
-        - for contents: their *sha1_git*
-        - for directories, revisions, releases or snapshots: their intrinsic
-          identifier
-        - for branch aliases, the name of the target branch (as raw bytes)
-        - for dangling branches, the empty string
-
-      Note that, akin to directory manifests, there is no separator between
-      entries. Because of symbolic branches, identifiers are of arbitrary
-      length but are length-encoded to avoid ambiguity.
-
-    Args:
-      snapshot (dict): the snapshot of which to compute the identifier. A
-        single entry is needed, ``'branches'``, which is itself a :class:`dict`
-        mapping each branch to its target
-
-    Returns:
-      str: the intrinsic identifier for `snapshot`
-
-    """
+    See :func:`swh.model.git_objects.snapshot_git_object` for details of the
+    format used to generate this identifier."""
     return hash_to_hex(model.Snapshot.from_dict(snapshot).id)
 
 
 def origin_identifier(origin):
-    """Return the intrinsic identifier for an origin.
-
-    An origin's identifier is the sha1 checksum of the entire origin URL
-
+    """Deprecated, use :class:`swh.model.Origin` instead:
+    ``origin_identifier(url)`` is equivalent to:
+    ``hash_to_hex(Origin(url=url).id)``.
     """
-    return hash_to_hex(model.Origin.from_dict(origin).id)
-
-
-def raw_extrinsic_metadata_identifier(metadata: Dict[str, Any]) -> str:
-    """Return the intrinsic identifier for a RawExtrinsicMetadata object.
-
-    A raw_extrinsic_metadata identifier is a salted sha1 (using the git
-    hashing algorithm with the ``raw_extrinsic_metadata`` object type) of
-    a manifest following the format::
-
-        target $ExtendedSwhid
-        discovery_date $Timestamp
-        authority $StrWithoutSpaces $IRI
-        fetcher $Str $Version
-        format $StrWithoutSpaces
-        origin $IRI                         <- optional
-        visit $IntInDecimal                 <- optional
-        snapshot $CoreSwhid                 <- optional
-        release $CoreSwhid                  <- optional
-        revision $CoreSwhid                 <- optional
-        path $Bytes                         <- optional
-        directory $CoreSwhid                <- optional
-
-        $MetadataBytes
-
-    $IRI must be RFC 3987 IRIs (so they may contain newlines, that are escaped as
-    described below)
-
-    $StrWithoutSpaces and $Version are ASCII strings, and may not contain spaces.
-
-    $Str is an UTF-8 string.
 
-    $CoreSwhid are core SWHIDs, as defined in :ref:`persistent-identifiers`.
-    $ExtendedSwhid is a core SWHID, with extra types allowed ('ori' for
-    origins and 'emd' for raw extrinsic metadata)
-
-    $Timestamp is a decimal representation of the rounded-down integer number of
-    seconds since the UNIX epoch (1970-01-01 00:00:00 UTC),
-    with no leading '0' (unless the timestamp value is zero) and no timezone.
-    It may be negative by prefixing it with a '-', which must not be followed
-    by a '0'.
-
-    Newlines in $Bytes, $Str, and $Iri are escaped as with other git fields,
-    ie. by adding a space after them.
-
-    Returns:
-      str: the intrinsic identifier for ``metadata``
-
-    """
-    return hash_to_hex(model.RawExtrinsicMetadata.from_dict(metadata).id)
-
-
-def extid_identifier(extid: Dict[str, Any]) -> str:
-    """Return the intrinsic identifier for an ExtID object.
-
-    An ExtID identifier is a salted sha1 (using the git hashing algorithm with
-    the ``extid`` object type) of a manifest following the format:
-
-    ```
-    extid_type $StrWithoutSpaces
-    [extid_version $Str]
-    extid $Bytes
-    target $CoreSwhid
-    ```
-
-    $StrWithoutSpaces is an ASCII string, and may not contain spaces.
-
-    Newlines in $Bytes are escaped as with other git fields, ie. by adding a
-    space after them.
-
-    The extid_version line is only generated if the version is non-zero.
-
-    Returns:
-      str: the intrinsic identifier for `extid`
-
-    """
-
-    return hash_to_hex(model.ExtID.from_dict(extid).id)
+    return hash_to_hex(model.Origin.from_dict(origin).id)
diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py
index 1f5dded9..d920c1e7 100644
--- a/swh/model/tests/swh_model_data.py
+++ b/swh/model/tests/swh_model_data.py
@@ -9,7 +9,6 @@ from typing import Dict, Sequence
 import attr
 
 from swh.model.hashutil import MultiHash, hash_to_bytes
-from swh.model.identifiers import ExtendedSWHID
 from swh.model.model import (
     BaseModel,
     Content,
@@ -35,6 +34,7 @@ from swh.model.model import (
     Timestamp,
     TimestampWithTimezone,
 )
+from swh.model.swhids import ExtendedSWHID
 
 UTC = datetime.timezone.utc
 
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
index c065b0df..f5da0a93 100644
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -12,18 +12,27 @@ import unittest
 import attr
 import pytest
 
-from swh.model import hashutil, identifiers
+from swh.model import git_objects, hashutil
 from swh.model.exceptions import ValidationError
 from swh.model.hashutil import hash_to_bytes as _x
-from swh.model.hashutil import hash_to_hex
-from swh.model.identifiers import (
+from swh.model.model import (
+    Content,
+    Directory,
+    ExtID,
+    Origin,
+    RawExtrinsicMetadata,
+    Release,
+    Revision,
+    Snapshot,
+    TimestampWithTimezone,
+)
+from swh.model.swhids import (
     SWHID_QUALIFIERS,
     CoreSWHID,
     ExtendedObjectType,
     ExtendedSWHID,
     ObjectType,
     QualifiedSWHID,
-    normalize_timestamp,
 )
 
 
@@ -51,11 +60,11 @@ class UtilityFunctionsDateOffset(unittest.TestCase):
 
     def test_format_date(self):
         for date_repr, date in self.dates.items():
-            self.assertEqual(identifiers.format_date(date), date_repr)
+            self.assertEqual(git_objects.format_date(date), date_repr)
 
     def test_format_offset(self):
         for offset, res in self.offsets.items():
-            self.assertEqual(identifiers.format_offset(offset), res)
+            self.assertEqual(git_objects.format_offset(offset), res)
 
 
 content_example = {
@@ -72,7 +81,7 @@ class ContentIdentifier(unittest.TestCase):
 
     def test_content_identifier(self):
         self.assertEqual(
-            identifiers.content_identifier(content_example), self.content_id
+            Content.from_data(content_example["data"]).hashes(), self.content_id
         )
 
 
@@ -199,26 +208,22 @@ class DirectoryIdentifier(unittest.TestCase):
         }
 
     def test_dir_identifier(self):
+        self.assertEqual(Directory.from_dict(self.directory).id, self.directory["id"])
         self.assertEqual(
-            _x(identifiers.directory_identifier(self.directory)), self.directory["id"]
-        )
-        self.assertEqual(
-            _x(identifiers.directory_identifier(remove_id(self.directory))),
-            self.directory["id"],
+            Directory.from_dict(remove_id(self.directory)).id, self.directory["id"],
         )
 
     def test_dir_identifier_entry_order(self):
         # Reverse order of entries, check the id is still the same.
         directory = {"entries": reversed(self.directory["entries"])}
         self.assertEqual(
-            _x(identifiers.directory_identifier(remove_id(directory))),
-            self.directory["id"],
+            Directory.from_dict(remove_id(directory)).id, self.directory["id"],
         )
 
     def test_dir_identifier_empty_directory(self):
         self.assertEqual(
-            identifiers.directory_identifier(remove_id(self.empty_directory)),
-            self.empty_directory["id"],
+            Directory.from_dict(remove_id(self.empty_directory)).id,
+            _x(self.empty_directory["id"]),
         )
 
 
@@ -270,7 +275,7 @@ dg1KdHOa34shrKDaOVzW
         self.revision = revision_example
 
         self.revision_none_metadata = {
-            "id": "bc0195aad0daa2ad5b0d76cce22b167bc3435590",
+            "id": _x("bc0195aad0daa2ad5b0d76cce22b167bc3435590"),
             "directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"),
             "parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")],
             "author": {
@@ -328,7 +333,7 @@ dg1KdHOa34shrKDaOVzW
 
         # cat commit.txt | git hash-object -t commit --stdin
         self.revision_with_extra_headers = {
-            "id": "010d34f384fa99d047cdd5e2f41e56e5c2feee45",
+            "id": _x("010d34f384fa99d047cdd5e2f41e56e5c2feee45"),
             "directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"),
             "parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")],
             "author": {
@@ -355,7 +360,7 @@ dg1KdHOa34shrKDaOVzW
         }
 
         self.revision_with_gpgsig = {
-            "id": "44cc742a8ca17b9c279be4cc195a93a6ef7a320e",
+            "id": _x("44cc742a8ca17b9c279be4cc195a93a6ef7a320e"),
             "directory": _x("b134f9b7dc434f593c0bab696345548b37de0558"),
             "parents": [
                 _x("689664ae944b4692724f13b709a4e4de28b54e57"),
@@ -380,7 +385,7 @@ dg1KdHOa34shrKDaOVzW
         }
 
         self.revision_no_message = {
-            "id": "4cfc623c9238fa92c832beed000ce2d003fd8333",
+            "id": _x("4cfc623c9238fa92c832beed000ce2d003fd8333"),
             "directory": _x("b134f9b7dc434f593c0bab696345548b37de0558"),
             "parents": [
                 _x("689664ae944b4692724f13b709a4e4de28b54e57"),
@@ -400,7 +405,7 @@ dg1KdHOa34shrKDaOVzW
         }
 
         self.revision_empty_message = {
-            "id": "7442cd78bd3b4966921d6a7f7447417b7acb15eb",
+            "id": _x("7442cd78bd3b4966921d6a7f7447417b7acb15eb"),
             "directory": _x("b134f9b7dc434f593c0bab696345548b37de0558"),
             "parents": [
                 _x("689664ae944b4692724f13b709a4e4de28b54e57"),
@@ -420,7 +425,7 @@ dg1KdHOa34shrKDaOVzW
         }
 
         self.revision_only_fullname = {
-            "id": "010d34f384fa99d047cdd5e2f41e56e5c2feee45",
+            "id": _x("010d34f384fa99d047cdd5e2f41e56e5c2feee45"),
             "directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"),
             "parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")],
             "author": {"fullname": b"Linus Torvalds <torvalds@linux-foundation.org>",},
@@ -442,56 +447,52 @@ dg1KdHOa34shrKDaOVzW
 
     def test_revision_identifier(self):
         self.assertEqual(
-            identifiers.revision_identifier(self.revision),
-            hash_to_hex(self.revision["id"]),
+            Revision.from_dict(self.revision).id, self.revision["id"],
         )
         self.assertEqual(
-            identifiers.revision_identifier(remove_id(self.revision)),
-            hash_to_hex(self.revision["id"]),
+            Revision.from_dict(remove_id(self.revision)).id, self.revision["id"],
         )
 
     def test_revision_identifier_none_metadata(self):
         self.assertEqual(
-            identifiers.revision_identifier(remove_id(self.revision_none_metadata)),
-            hash_to_hex(self.revision_none_metadata["id"]),
+            Revision.from_dict(remove_id(self.revision_none_metadata)).id,
+            self.revision_none_metadata["id"],
         )
 
     def test_revision_identifier_synthetic(self):
         self.assertEqual(
-            identifiers.revision_identifier(remove_id(self.synthetic_revision)),
-            hash_to_hex(self.synthetic_revision["id"]),
+            Revision.from_dict(remove_id(self.synthetic_revision)).id,
+            self.synthetic_revision["id"],
         )
 
     def test_revision_identifier_with_extra_headers(self):
         self.assertEqual(
-            identifiers.revision_identifier(
-                remove_id(self.revision_with_extra_headers)
-            ),
-            hash_to_hex(self.revision_with_extra_headers["id"]),
+            Revision.from_dict(remove_id(self.revision_with_extra_headers)).id,
+            self.revision_with_extra_headers["id"],
         )
 
     def test_revision_identifier_with_gpgsig(self):
         self.assertEqual(
-            identifiers.revision_identifier(remove_id(self.revision_with_gpgsig)),
-            hash_to_hex(self.revision_with_gpgsig["id"]),
+            Revision.from_dict(remove_id(self.revision_with_gpgsig)).id,
+            self.revision_with_gpgsig["id"],
         )
 
     def test_revision_identifier_no_message(self):
         self.assertEqual(
-            identifiers.revision_identifier(remove_id(self.revision_no_message)),
-            hash_to_hex(self.revision_no_message["id"]),
+            Revision.from_dict(remove_id(self.revision_no_message)).id,
+            self.revision_no_message["id"],
         )
 
     def test_revision_identifier_empty_message(self):
         self.assertEqual(
-            identifiers.revision_identifier(remove_id(self.revision_empty_message)),
-            hash_to_hex(self.revision_empty_message["id"]),
+            Revision.from_dict(remove_id(self.revision_empty_message)).id,
+            self.revision_empty_message["id"],
         )
 
     def test_revision_identifier_only_fullname(self):
         self.assertEqual(
-            identifiers.revision_identifier(remove_id(self.revision_only_fullname)),
-            hash_to_hex(self.revision_only_fullname["id"]),
+            Revision.from_dict(remove_id(self.revision_only_fullname)).id,
+            self.revision_only_fullname["id"],
         )
 
 
@@ -608,48 +609,46 @@ o6X/3T+vm8K3bf3driRr34c=
 
     def test_release_identifier(self):
         self.assertEqual(
-            identifiers.release_identifier(self.release),
-            hash_to_hex(self.release["id"]),
+            Release.from_dict(self.release).id, self.release["id"],
         )
         self.assertEqual(
-            identifiers.release_identifier(remove_id(self.release)),
-            hash_to_hex(self.release["id"]),
+            Release.from_dict(remove_id(self.release)).id, self.release["id"],
         )
 
     def test_release_identifier_no_author(self):
         self.assertEqual(
-            identifiers.release_identifier(remove_id(self.release_no_author)),
-            hash_to_hex(self.release_no_author["id"]),
+            Release.from_dict(remove_id(self.release_no_author)).id,
+            self.release_no_author["id"],
         )
 
     def test_release_identifier_no_message(self):
         self.assertEqual(
-            identifiers.release_identifier(remove_id(self.release_no_message)),
-            hash_to_hex(self.release_no_message["id"]),
+            Release.from_dict(remove_id(self.release_no_message)).id,
+            self.release_no_message["id"],
         )
 
     def test_release_identifier_empty_message(self):
         self.assertEqual(
-            identifiers.release_identifier(remove_id(self.release_empty_message)),
-            hash_to_hex(self.release_empty_message["id"]),
+            Release.from_dict(remove_id(self.release_empty_message)).id,
+            self.release_empty_message["id"],
         )
 
     def test_release_identifier_negative_utc(self):
         self.assertEqual(
-            identifiers.release_identifier(remove_id(self.release_negative_utc)),
-            hash_to_hex(self.release_negative_utc["id"]),
+            Release.from_dict(remove_id(self.release_negative_utc)).id,
+            self.release_negative_utc["id"],
         )
 
     def test_release_identifier_newline_in_author(self):
         self.assertEqual(
-            identifiers.release_identifier(remove_id(self.release_newline_in_author)),
-            hash_to_hex(self.release_newline_in_author["id"]),
+            Release.from_dict(remove_id(self.release_newline_in_author)).id,
+            self.release_newline_in_author["id"],
         )
 
     def test_release_identifier_snapshot_target(self):
         self.assertEqual(
-            identifiers.release_identifier(self.release_snapshot_target),
-            hash_to_hex(self.release_snapshot_target["id"]),
+            Release.from_dict(self.release_snapshot_target).id,
+            self.release_snapshot_target["id"],
         )
 
 
@@ -687,17 +686,17 @@ class SnapshotIdentifier(unittest.TestCase):
         super().setUp()
 
         self.empty = {
-            "id": "1a8893e6a86f444e8be8e7bda6cb34fb1735a00e",
+            "id": _x("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"),
             "branches": {},
         }
 
         self.dangling_branch = {
-            "id": "c84502e821eb21ed84e9fd3ec40973abc8b32353",
+            "id": _x("c84502e821eb21ed84e9fd3ec40973abc8b32353"),
             "branches": {b"HEAD": None,},
         }
 
         self.unresolved = {
-            "id": "84b4548ea486e4b0a7933fa541ff1503a0afe1e0",
+            "id": _x("84b4548ea486e4b0a7933fa541ff1503a0afe1e0"),
             "branches": {b"foo": {"target": b"bar", "target_type": "alias",},},
         }
 
@@ -705,24 +704,22 @@ class SnapshotIdentifier(unittest.TestCase):
 
     def test_empty_snapshot(self):
         self.assertEqual(
-            identifiers.snapshot_identifier(remove_id(self.empty)),
-            hash_to_hex(self.empty["id"]),
+            Snapshot.from_dict(remove_id(self.empty)).id, self.empty["id"],
         )
 
     def test_dangling_branch(self):
         self.assertEqual(
-            identifiers.snapshot_identifier(remove_id(self.dangling_branch)),
-            hash_to_hex(self.dangling_branch["id"]),
+            Snapshot.from_dict(remove_id(self.dangling_branch)).id,
+            self.dangling_branch["id"],
         )
 
     def test_unresolved(self):
         with self.assertRaisesRegex(ValueError, "b'foo' -> b'bar'"):
-            identifiers.snapshot_identifier(remove_id(self.unresolved))
+            Snapshot.from_dict(remove_id(self.unresolved))
 
     def test_all_types(self):
         self.assertEqual(
-            identifiers.snapshot_identifier(remove_id(self.all_types)),
-            hash_to_hex(self.all_types["id"]),
+            Snapshot.from_dict(remove_id(self.all_types)).id, self.all_types["id"],
         )
 
 
@@ -775,15 +772,18 @@ class RawExtrinsicMetadataIdentifier(unittest.TestCase):
         )
 
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_git_object(self.minimal), git_object,
+            git_objects.raw_extrinsic_metadata_git_object(
+                RawExtrinsicMetadata.from_dict(self.minimal)
+            ),
+            git_object,
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(self.minimal),
-            hashlib.sha1(git_object).hexdigest(),
+            RawExtrinsicMetadata.from_dict(self.minimal).id,
+            hashlib.sha1(git_object).digest(),
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(self.minimal),
-            "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+            RawExtrinsicMetadata.from_dict(self.minimal).id,
+            _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
         )
 
     def test_maximal(self):
@@ -806,15 +806,18 @@ class RawExtrinsicMetadataIdentifier(unittest.TestCase):
         )
 
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_git_object(self.maximal), git_object,
+            git_objects.raw_extrinsic_metadata_git_object(
+                RawExtrinsicMetadata.from_dict(self.maximal)
+            ),
+            git_object,
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(self.maximal),
-            hashlib.sha1(git_object).hexdigest(),
+            RawExtrinsicMetadata.from_dict(self.maximal).id,
+            hashlib.sha1(git_object).digest(),
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(self.maximal),
-            "f96966e1093d15236a31fde07e47d5b1c9428049",
+            RawExtrinsicMetadata.from_dict(self.maximal).id,
+            _x("f96966e1093d15236a31fde07e47d5b1c9428049"),
         )
 
     def test_nonascii_path(self):
@@ -836,15 +839,18 @@ class RawExtrinsicMetadataIdentifier(unittest.TestCase):
         )
 
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_git_object(metadata), git_object,
+            git_objects.raw_extrinsic_metadata_git_object(
+                RawExtrinsicMetadata.from_dict(metadata)
+            ),
+            git_object,
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(metadata),
-            hashlib.sha1(git_object).hexdigest(),
+            RawExtrinsicMetadata.from_dict(metadata).id,
+            hashlib.sha1(git_object).digest(),
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(metadata),
-            "7cc83fd1912176510c083f5df43f01b09af4b333",
+            RawExtrinsicMetadata.from_dict(metadata).id,
+            _x("7cc83fd1912176510c083f5df43f01b09af4b333"),
         )
 
     def test_timezone_insensitive(self):
@@ -859,16 +865,20 @@ class RawExtrinsicMetadataIdentifier(unittest.TestCase):
         }
 
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_git_object(self.minimal),
-            identifiers.raw_extrinsic_metadata_git_object(metadata),
+            git_objects.raw_extrinsic_metadata_git_object(
+                RawExtrinsicMetadata.from_dict(self.minimal)
+            ),
+            git_objects.raw_extrinsic_metadata_git_object(
+                RawExtrinsicMetadata.from_dict(metadata)
+            ),
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(self.minimal),
-            identifiers.raw_extrinsic_metadata_identifier(metadata),
+            RawExtrinsicMetadata.from_dict(self.minimal).id,
+            RawExtrinsicMetadata.from_dict(metadata).id,
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(metadata),
-            "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+            RawExtrinsicMetadata.from_dict(metadata).id,
+            _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
         )
 
     def test_microsecond_insensitive(self):
@@ -882,16 +892,20 @@ class RawExtrinsicMetadataIdentifier(unittest.TestCase):
         }
 
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_git_object(self.minimal),
-            identifiers.raw_extrinsic_metadata_git_object(metadata),
+            git_objects.raw_extrinsic_metadata_git_object(
+                RawExtrinsicMetadata.from_dict(self.minimal)
+            ),
+            git_objects.raw_extrinsic_metadata_git_object(
+                RawExtrinsicMetadata.from_dict(metadata)
+            ),
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(self.minimal),
-            identifiers.raw_extrinsic_metadata_identifier(metadata),
+            RawExtrinsicMetadata.from_dict(self.minimal).id,
+            RawExtrinsicMetadata.from_dict(metadata).id,
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(metadata),
-            "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+            RawExtrinsicMetadata.from_dict(metadata).id,
+            _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
         )
 
     def test_noninteger_timezone(self):
@@ -906,16 +920,20 @@ class RawExtrinsicMetadataIdentifier(unittest.TestCase):
         }
 
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_git_object(self.minimal),
-            identifiers.raw_extrinsic_metadata_git_object(metadata),
+            git_objects.raw_extrinsic_metadata_git_object(
+                RawExtrinsicMetadata.from_dict(self.minimal)
+            ),
+            git_objects.raw_extrinsic_metadata_git_object(
+                RawExtrinsicMetadata.from_dict(metadata)
+            ),
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(self.minimal),
-            identifiers.raw_extrinsic_metadata_identifier(metadata),
+            RawExtrinsicMetadata.from_dict(self.minimal).id,
+            RawExtrinsicMetadata.from_dict(metadata).id,
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(metadata),
-            "5c13f20ba336e44549baf3d7b9305b027ec9f43d",
+            RawExtrinsicMetadata.from_dict(metadata).id,
+            _x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
         )
 
     def test_negative_timestamp(self):
@@ -938,15 +956,18 @@ class RawExtrinsicMetadataIdentifier(unittest.TestCase):
         )
 
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_git_object(metadata), git_object,
+            git_objects.raw_extrinsic_metadata_git_object(
+                RawExtrinsicMetadata.from_dict(metadata)
+            ),
+            git_object,
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(metadata),
-            hashlib.sha1(git_object).hexdigest(),
+            RawExtrinsicMetadata.from_dict(metadata).id,
+            hashlib.sha1(git_object).digest(),
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(metadata),
-            "895d0821a2991dd376ddc303424aceb7c68280f9",
+            RawExtrinsicMetadata.from_dict(metadata).id,
+            _x("895d0821a2991dd376ddc303424aceb7c68280f9"),
         )
 
     def test_epoch(self):
@@ -969,15 +990,18 @@ class RawExtrinsicMetadataIdentifier(unittest.TestCase):
         )
 
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_git_object(metadata), git_object,
+            git_objects.raw_extrinsic_metadata_git_object(
+                RawExtrinsicMetadata.from_dict(metadata)
+            ),
+            git_object,
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(metadata),
-            hashlib.sha1(git_object).hexdigest(),
+            RawExtrinsicMetadata.from_dict(metadata).id,
+            hashlib.sha1(git_object).digest(),
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(metadata),
-            "27a53df54ace35ebd910493cdc70b334d6b7cb88",
+            RawExtrinsicMetadata.from_dict(metadata).id,
+            _x("27a53df54ace35ebd910493cdc70b334d6b7cb88"),
         )
 
     def test_negative_epoch(self):
@@ -1000,15 +1024,18 @@ class RawExtrinsicMetadataIdentifier(unittest.TestCase):
         )
 
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_git_object(metadata), git_object,
+            git_objects.raw_extrinsic_metadata_git_object(
+                RawExtrinsicMetadata.from_dict(metadata)
+            ),
+            git_object,
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(metadata),
-            hashlib.sha1(git_object).hexdigest(),
+            RawExtrinsicMetadata.from_dict(metadata).id,
+            hashlib.sha1(git_object).digest(),
         )
         self.assertEqual(
-            identifiers.raw_extrinsic_metadata_identifier(metadata),
-            "be7154a8fd49d87f81547ea634d1e2152907d089",
+            RawExtrinsicMetadata.from_dict(metadata).id,
+            _x("be7154a8fd49d87f81547ea634d1e2152907d089"),
         )
 
 
@@ -1020,8 +1047,8 @@ origin_example = {
 class OriginIdentifier(unittest.TestCase):
     def test_content_identifier(self):
         self.assertEqual(
-            identifiers.origin_identifier(origin_example),
-            "b63a575fe3faab7692c9f38fb09d4bb45651bb0f",
+            Origin.from_dict(origin_example).id,
+            _x("b63a575fe3faab7692c9f38fb09d4bb45651bb0f"),
         )
 
 
@@ -1111,7 +1138,7 @@ TS_DICTS = [
 
 @pytest.mark.parametrize("dict_input,expected", TS_DICTS)
 def test_normalize_timestamp_dict(dict_input, expected):
-    assert normalize_timestamp(dict_input) == expected
+    assert TimestampWithTimezone.from_dict(dict_input).to_dict() == expected
 
 
 TS_DICTS_INVALID_TIMESTAMP = [
@@ -1127,7 +1154,7 @@ TS_DICTS_INVALID_TIMESTAMP = [
 @pytest.mark.parametrize("dict_input", TS_DICTS_INVALID_TIMESTAMP)
 def test_normalize_timestamp_dict_invalid_timestamp(dict_input):
     with pytest.raises(ValueError, match="non-integer timestamp"):
-        normalize_timestamp(dict_input)
+        TimestampWithTimezone.from_dict(dict_input)
 
 
 UTC = datetime.timezone.utc
@@ -1152,7 +1179,7 @@ TS_DT_EXPECTED = [1582814359, 4765132799, -11348929020]
 @pytest.mark.parametrize("microsecond", [0, 1, 10, 100, 1000, 999999])
 def test_normalize_timestamp_datetime(date, seconds, tz, offset, microsecond):
     date = date.astimezone(tz).replace(microsecond=microsecond)
-    assert normalize_timestamp(date) == {
+    assert TimestampWithTimezone.from_dict(date).to_dict() == {
         "timestamp": {"seconds": seconds, "microseconds": microsecond},
         "offset": offset,
         "negative_utc": False,
@@ -1776,18 +1803,19 @@ def test_extid_identifier_bwcompat():
         "target": "swh:1:dir:" + "00" * 20,
     }
 
-    assert (
-        identifiers.extid_identifier(extid_dict)
-        == "b9295e1931c31e40a7e3e1e967decd1c89426455"
+    assert ExtID.from_dict(extid_dict).id == _x(
+        "b9295e1931c31e40a7e3e1e967decd1c89426455"
     )
 
-    assert identifiers.extid_identifier(
-        {**extid_dict, "extid_version": 0}
-    ) == identifiers.extid_identifier(extid_dict)
+    assert (
+        ExtID.from_dict({**extid_dict, "extid_version": 0}).id
+        == ExtID.from_dict(extid_dict).id
+    )
 
-    assert identifiers.extid_identifier(
-        {**extid_dict, "extid_version": 1}
-    ) != identifiers.extid_identifier(extid_dict)
+    assert (
+        ExtID.from_dict({**extid_dict, "extid_version": 1}).id
+        != ExtID.from_dict(extid_dict).id
+    )
 
 
 def test_object_types():
diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py
index caad5e2a..781cfa46 100644
--- a/swh/model/tests/test_model.py
+++ b/swh/model/tests/test_model.py
@@ -12,20 +12,8 @@ from hypothesis import given
 from hypothesis.strategies import binary
 import pytest
 
-from swh.model.hashutil import MultiHash, hash_to_bytes, hash_to_hex
+from swh.model.hashutil import MultiHash, hash_to_bytes
 import swh.model.hypothesis_strategies as strategies
-from swh.model.identifiers import (
-    CoreSWHID,
-    ExtendedSWHID,
-    ObjectType,
-    content_identifier,
-    directory_identifier,
-    origin_identifier,
-    raw_extrinsic_metadata_identifier,
-    release_identifier,
-    revision_identifier,
-    snapshot_identifier,
-)
 from swh.model.model import (
     BaseModel,
     Content,
@@ -46,14 +34,13 @@ from swh.model.model import (
     Timestamp,
     TimestampWithTimezone,
 )
+from swh.model.swhids import CoreSWHID, ExtendedSWHID, ObjectType
 from swh.model.tests.swh_model_data import TEST_OBJECTS
 from swh.model.tests.test_identifiers import (
     TS_DATETIMES,
     TS_TIMEZONES,
-    content_example,
     directory_example,
     metadata_example,
-    origin_example,
     release_example,
     revision_example,
     snapshot_example,
@@ -736,94 +723,6 @@ def test_revision_extra_headers_as_lists_from_dict():
     assert rev_model.extra_headers == extra_headers
 
 
-# ID computation
-
-
-def test_content_model_id_computation():
-    cnt_dict = content_example.copy()
-
-    cnt_id_str = hash_to_hex(content_identifier(cnt_dict)["sha1_git"])
-    cnt_model = Content.from_data(cnt_dict["data"])
-    assert str(cnt_model.swhid()) == "swh:1:cnt:" + cnt_id_str
-
-
-def test_directory_model_id_computation():
-    dir_dict = directory_example.copy()
-    del dir_dict["id"]
-
-    dir_id_str = directory_identifier(dir_dict)
-    dir_id = hash_to_bytes(dir_id_str)
-    dir_model = Directory.from_dict(dir_dict)
-    assert dir_model.id == dir_id
-    assert str(dir_model.swhid()) == "swh:1:dir:" + dir_id_str
-
-
-def test_revision_model_id_computation():
-    rev_dict = revision_example.copy()
-    del rev_dict["id"]
-
-    rev_id_str = revision_identifier(rev_dict)
-    rev_id = hash_to_bytes(rev_id_str)
-    rev_model = Revision.from_dict(rev_dict)
-    assert rev_model.id == rev_id
-    assert str(rev_model.swhid()) == "swh:1:rev:" + rev_id_str
-
-
-def test_revision_model_id_computation_with_no_date():
-    """We can have revision with date to None
-
-    """
-    rev_dict = revision_example.copy()
-    rev_dict["date"] = None
-    rev_dict["committer_date"] = None
-    del rev_dict["id"]
-
-    rev_id = hash_to_bytes(revision_identifier(rev_dict))
-    rev_model = Revision.from_dict(rev_dict)
-    assert rev_model.date is None
-    assert rev_model.committer_date is None
-    assert rev_model.id == rev_id
-
-
-def test_release_model_id_computation():
-    rel_dict = release_example.copy()
-    del rel_dict["id"]
-
-    rel_id_str = release_identifier(rel_dict)
-    rel_id = hash_to_bytes(rel_id_str)
-    rel_model = Release.from_dict(rel_dict)
-    assert isinstance(rel_model.date, TimestampWithTimezone)
-    assert rel_model.id == hash_to_bytes(rel_id)
-    assert str(rel_model.swhid()) == "swh:1:rel:" + rel_id_str
-
-
-def test_snapshot_model_id_computation():
-    snp_dict = snapshot_example.copy()
-    del snp_dict["id"]
-
-    snp_id_str = snapshot_identifier(snp_dict)
-    snp_id = hash_to_bytes(snp_id_str)
-    snp_model = Snapshot.from_dict(snp_dict)
-    assert snp_model.id == snp_id
-    assert str(snp_model.swhid()) == "swh:1:snp:" + snp_id_str
-
-
-def test_origin_model_id_computation():
-    ori_dict = origin_example.copy()
-
-    ori_id_str = origin_identifier(ori_dict)
-    ori_model = Origin.from_dict(ori_dict)
-    assert str(ori_model.swhid()) == "swh:1:ori:" + ori_id_str
-
-
-def test_raw_extrinsic_metadata_model_id_computation():
-    emd_dict = metadata_example.copy()
-
-    emd_id_str = raw_extrinsic_metadata_identifier(emd_dict)
-    emd_model = RawExtrinsicMetadata.from_dict(emd_dict)
-    assert str(emd_model.swhid()) == "swh:1:emd:" + emd_id_str
-
-
 @given(strategies.objects(split_content=True))
 def test_object_type(objtype_and_obj):
     obj_type, obj = objtype_and_obj
-- 
GitLab