From e2700570292655529fb004538bb816e87afb0ae4 Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Fri, 31 May 2024 13:59:42 +0200
Subject: [PATCH] Introduce an object patching proxy feature in the
 MaskingProxy

This is an alternative to modifying the `person` table, as that
technique only worked on the PostgreSQL backend and not with Cassandra,
as it does not have a `person` table (authors and committers are inlined
in the revision and release tables).

This does not come with cli tooling to manage the display name table
entries for now.
---
 requirements-test.txt                         |   1 +
 swh/storage/proxies/masking/__init__.py       | 190 +++++++++-
 swh/storage/proxies/masking/db.py             |  50 ++-
 swh/storage/proxies/masking/sql/30-schema.sql |  13 +
 .../proxies/masking/sql/upgrades/194.sql      |  14 +
 swh/storage/tests/masking/conftest.py         |  18 +
 swh/storage/tests/masking/test_db.py          |  14 +
 swh/storage/tests/masking/test_proxy.py       |  23 +-
 .../tests/masking/test_proxy_masking.py       |  11 -
 ...o_masking.py => test_proxy_passthrough.py} |  20 +-
 .../tests/masking/test_proxy_patching.py      | 330 ++++++++++++++++++
 11 files changed, 627 insertions(+), 57 deletions(-)
 create mode 100644 swh/storage/proxies/masking/sql/upgrades/194.sql
 rename swh/storage/tests/masking/{test_proxy_no_masking.py => test_proxy_passthrough.py} (62%)
 create mode 100644 swh/storage/tests/masking/test_proxy_patching.py

diff --git a/requirements-test.txt b/requirements-test.txt
index d5f0ec703..3c72b7aa1 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -8,6 +8,7 @@ pytest-mock
 swh.core[testing] >= 3.0.0
 swh.model[testing] >= 6.13.0
 pytz
+pytest-postgresql
 pytest-redis
 pytest-xdist
 types-python-dateutil
diff --git a/swh/storage/proxies/masking/__init__.py b/swh/storage/proxies/masking/__init__.py
index d578e6a68..d36943c99 100644
--- a/swh/storage/proxies/masking/__init__.py
+++ b/swh/storage/proxies/masking/__init__.py
@@ -6,22 +6,35 @@
 from contextlib import contextmanager
 import functools
 import inspect
-from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Union
-
+import itertools
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    TypeVar,
+    Union,
+)
+
+import attr
 import psycopg2.pool
 
-from swh.core.api.classes import PagedResult
+from swh.core.utils import grouper
 from swh.model.hashutil import MultiHash
-from swh.model.model import Origin
+from swh.model.model import Origin, Person, Release, Revision, Sha1Git
 from swh.model.swhids import ExtendedObjectType, ExtendedSWHID
 from swh.storage import get_storage
 from swh.storage.exc import MaskedObjectException
-from swh.storage.interface import HashDict, Sha1, StorageInterface
+from swh.storage.interface import HashDict, PagedResult, Sha1, StorageInterface, TResult
 from swh.storage.metrics import DifferentialTimer
 from swh.storage.proxies.masking.db import MaskedStatus
 
 from .db import MaskingQuery
 
+BATCH_SIZE = 1024
 MASKING_OVERHEAD_METRIC = "swh_storage_masking_overhead_seconds"
 
 
@@ -192,7 +205,6 @@ class MaskingProxyStorage:
 
     def __getattr__(self, key):
         method = None
-
         if key in self._methods_by_name:
             method = self._methods_by_name[key](key)
         else:
@@ -233,7 +245,6 @@ class MaskingProxyStorage:
             "origin_get_by_sha1": self._getter_list,
             "content_find": self._getter_list,
             "skipped_content_find": self._getter_list,
-            "revision_log": self._getter_list,
             "revision_shortlog": self._getter_list,
             "extid_get_from_target": self._getter_list,
             "raw_extrinsic_metadata_get_by_ids": self._getter_list,
@@ -431,17 +442,19 @@ class MaskingProxyStorage:
         return newf
 
     def _raise_if_masked_result_in_list(
-        self, method_name: str, results: Iterable[Any]
-    ) -> None:
+        self, method_name: str, results: Iterable[TResult]
+    ) -> List[TResult]:
         """Raise a :exc:`MaskedObjectException` if any non-:const:`None` object
         in ``results`` is masked."""
         result_swhids = set()
+        results = list(results)
         for result in results:
             if result is not None:
                 result_swhids.update(self._get_swhids_in_result(method_name, result))
 
         if result_swhids:
             self._raise_if_masked_swhids(list(result_swhids))
+        return results
 
     def _getter_list(
         self,
@@ -481,3 +494,162 @@ class MaskingProxyStorage:
                 return results
 
         return newf
+
+    # Patching proxy feature set
+
+    TRevision = TypeVar("TRevision", Revision, Optional[Revision])
+
+    def _apply_revision_display_names(
+        self, revisions: List[TRevision]
+    ) -> List[TRevision]:
+        emails = set()
+        for rev in revisions:
+            if (
+                rev is not None
+                and rev.author is not None
+                and rev.author.email  # ignore None or empty email addresses
+            ):
+                emails.add(rev.author.email)
+            if (
+                rev is not None
+                and rev.committer is not None
+                and rev.committer.email  # ignore None or empty email addresses
+            ):
+                emails.add(rev.committer.email)
+
+        with self._masking_query() as q:
+            display_names = q.display_name(list(emails))
+
+        # Short path for the common case
+        if not display_names:
+            return revisions
+
+        persons: Dict[Optional[bytes], Person] = {
+            email: Person.from_fullname(display_name)
+            for (email, display_name) in display_names.items()
+        }
+
+        return [
+            None
+            if revision is None
+            else attr.evolve(
+                revision,
+                author=revision.author
+                if revision.author is None
+                else persons.get(revision.author.email, revision.author),
+                committer=revision.committer
+                if revision.committer is None
+                else persons.get(revision.committer.email, revision.committer),
+            )
+            for revision in revisions
+        ]
+
+    TRelease = TypeVar("TRelease", Release, Optional[Release])
+
+    def _apply_release_display_names(self, releases: List[TRelease]) -> List[TRelease]:
+        emails = set()
+        for rel in releases:
+            if (
+                rel is not None
+                and rel.author is not None
+                and rel.author.email  # ignore None or empty email addresses
+            ):
+                emails.add(rel.author.email)
+
+        with self._masking_query() as q:
+            display_names = q.display_name(list(emails))
+        # Short path for the common case
+        if not display_names:
+            return releases
+
+        persons: Dict[Optional[bytes], Person] = {
+            email: Person.from_fullname(display_name)
+            for (email, display_name) in display_names.items()
+        }
+
+        return [
+            None
+            if release is None
+            else attr.evolve(
+                release,
+                author=release.author
+                if release.author is None
+                else persons.get(release.author.email, release.author),
+            )
+            for release in releases
+        ]
+
+    def revision_get(
+        self, revision_ids: List[Sha1Git], ignore_displayname: bool = False
+    ) -> List[Optional[Revision]]:
+        revisions = self.storage.revision_get(revision_ids)
+        self._raise_if_masked_result_in_list("revision_get", revisions)
+        return self._apply_revision_display_names(revisions)
+
+    def revision_log(
+        self,
+        revisions: List[Sha1Git],
+        ignore_displayname: bool = False,
+        limit: Optional[int] = None,
+    ) -> Iterable[Optional[Dict[str, Any]]]:
+        revision_batches = grouper(
+            self.storage.revision_log(revisions, limit=limit), BATCH_SIZE
+        )
+        yield from map(
+            Revision.to_dict,
+            itertools.chain.from_iterable(
+                self._apply_revision_display_names(
+                    self._raise_if_masked_result_in_list(
+                        "revision_log", list(map(Revision.from_dict, revision_batch))
+                    )
+                )
+                for revision_batch in revision_batches
+            ),
+        )
+
+    def revision_get_partition(
+        self,
+        partition_id: int,
+        nb_partitions: int,
+        page_token: Optional[str] = None,
+        limit: int = 1000,
+    ) -> PagedResult[Revision]:
+        page: PagedResult[Revision] = self.storage.revision_get_partition(
+            partition_id, nb_partitions, page_token, limit
+        )
+        return PagedResult(
+            results=self._apply_revision_display_names(
+                self._raise_if_masked_result_in_list(
+                    "revision_get_parition", page.results
+                )
+            ),
+            next_page_token=page.next_page_token,
+        )
+
+    def release_get(
+        self, releases: List[Sha1Git], ignore_displayname: bool = False
+    ) -> List[Optional[Release]]:
+        return self._apply_release_display_names(
+            self._raise_if_masked_result_in_list(
+                "release_get", self.storage.release_get(releases)
+            )
+        )
+
+    def release_get_partition(
+        self,
+        partition_id: int,
+        nb_partitions: int,
+        page_token: Optional[str] = None,
+        limit: int = 1000,
+    ) -> PagedResult[Release]:
+        page = self.storage.release_get_partition(
+            partition_id, nb_partitions, page_token, limit
+        )
+        return PagedResult(
+            results=self._apply_release_display_names(
+                self._raise_if_masked_result_in_list(
+                    "release_get_partition", page.results
+                )
+            ),
+            next_page_token=page.next_page_token,
+        )
diff --git a/swh/storage/proxies/masking/db.py b/swh/storage/proxies/masking/db.py
index 9c28698dc..a3dff71af 100644
--- a/swh/storage/proxies/masking/db.py
+++ b/swh/storage/proxies/masking/db.py
@@ -88,9 +88,20 @@ class MaskedObject:
     state = attr.ib(type=MaskedState)
 
 
+@attr.s
+class DisplayName:
+    """A request for masking a set of objects"""
+
+    original_email = attr.ib(type=bytes)
+    """Email on revision/release objects to match before applying the display name"""
+
+    display_name = attr.ib(type=bytes)
+    """Full name, usually of the form ``Name <email>``, used for display queries"""
+
+
 class MaskingDb(BaseDb):
     # we started with 192, because this used to be part of the main storage db
-    current_version = 193
+    current_version = 194
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -329,6 +340,20 @@ class MaskingAdmin(MaskingDb):
 
         return MaskingRequestHistory(request=request_id, date=res[0], message=message)
 
+    def set_display_name(self, original_email: bytes, display_name: bytes) -> None:
+        """Updates the display name of the person identified by the email address."""
+        cur = self.cursor()
+
+        cur.execute(
+            """
+            INSERT INTO display_name (original_email, display_name)
+            VALUES (%s, %s)
+            ON CONFLICT (original_email) DO UPDATE
+            SET display_name=EXCLUDED.display_name
+            """,
+            (original_email, display_name),
+        )
+
     def get_history(self, request_id: UUID) -> List[MaskingRequestHistory]:
         """Get the history of a given request.
 
@@ -406,6 +431,29 @@ class MaskingQuery(MaskingDb):
             statsd.increment(METRIC_MASKED_TOTAL, len(ret))
         return ret
 
+    def display_name(self, original_emails: List[bytes]) -> Dict[bytes, bytes]:
+        """Returns the display name of the person identified by each ``original_email``,
+        if any.
+        """
+        cur = self.cursor()
+
+        ret: Dict[bytes, bytes] = {}
+
+        for original_email, display_name in psycopg2.extras.execute_values(
+            cur,
+            """
+            SELECT original_email, display_name
+            FROM display_name
+            INNER JOIN (VALUES %s) v(original_email)
+            USING (original_email)
+            """,
+            [(email,) for email in original_emails],
+            fetch=True,
+        ):
+            ret[original_email] = display_name
+
+        return ret
+
     def iter_masked_swhids(self) -> Iterator[Tuple[ExtendedSWHID, List[MaskedStatus]]]:
         """Returns the complete list of masked SWHIDs.
 
diff --git a/swh/storage/proxies/masking/sql/30-schema.sql b/swh/storage/proxies/masking/sql/30-schema.sql
index 373049d23..68f4bfd6b 100644
--- a/swh/storage/proxies/masking/sql/30-schema.sql
+++ b/swh/storage/proxies/masking/sql/30-schema.sql
@@ -41,3 +41,16 @@ comment on column masked_object.object_id is 'The object_id part of the object''
 comment on column masked_object.object_type is 'The object_type part of the object''s SWHID';
 comment on column masked_object.request is 'Reference to the affecting request';
 comment on column masked_object.state is 'The degree to which the object is masked as a result of the request';
+
+
+
+-- Used only by the patching proxy, not the masking proxy
+
+create table if not exists display_name (
+  original_email bytea not null primary key,
+  display_name bytea not null
+);
+
+comment on table display_name is 'Map from revision/release email to current full name';
+comment on column display_name.original_email is 'Email on revision/release objects to match before applying the display name';
+comment on column display_name.display_name is 'Full name, usually of the form `Name <email>, used for display queries';
diff --git a/swh/storage/proxies/masking/sql/upgrades/194.sql b/swh/storage/proxies/masking/sql/upgrades/194.sql
new file mode 100644
index 000000000..b9c0807de
--- /dev/null
+++ b/swh/storage/proxies/masking/sql/upgrades/194.sql
@@ -0,0 +1,14 @@
+--
+-- SWH Masking Proxy DB schema upgrade
+-- from_version: 193
+-- to_version: 194
+-- description: Add the patching proxy display_name table
+
+create table if not exists display_name (
+  original_email bytea not null primary key,
+  display_name bytea not null
+);
+
+comment on table display_name is 'Map from revision/release email to current full name';
+comment on column display_name.original_email is 'Email on revision/release objects to match before applying the display name';
+comment on column display_name.display_name is 'Full name, usually of the form `Name <email>, used for display queries';
diff --git a/swh/storage/tests/masking/conftest.py b/swh/storage/tests/masking/conftest.py
index 6bddd9e53..07916e5a5 100644
--- a/swh/storage/tests/masking/conftest.py
+++ b/swh/storage/tests/masking/conftest.py
@@ -9,6 +9,7 @@ import pytest
 from pytest_postgresql import factories
 
 from swh.core.db.db_utils import initialize_database_for_module
+from swh.storage.proxies.masking import MaskingProxyStorage
 from swh.storage.proxies.masking.db import MaskingAdmin, MaskingQuery
 
 masking_db_postgresql_proc = factories.postgresql_proc(
@@ -35,3 +36,20 @@ def masking_admin(masking_db_postgresql) -> MaskingAdmin:
 @pytest.fixture
 def masking_query(masking_db_postgresql) -> MaskingQuery:
     return MaskingQuery.connect(masking_db_postgresql.info.dsn)
+
+
+@pytest.fixture
+def swh_storage_backend_config():
+    yield {
+        "cls": "memory",
+        "journal_writer": {
+            "cls": "memory",
+        },
+    }
+
+
+@pytest.fixture
+def swh_storage(masking_db_postgresql, swh_storage_backend):
+    return MaskingProxyStorage(
+        masking_db=masking_db_postgresql.info.dsn, storage=swh_storage_backend
+    )
diff --git a/swh/storage/tests/masking/test_db.py b/swh/storage/tests/masking/test_db.py
index 09f95ceba..5dc050b33 100644
--- a/swh/storage/tests/masking/test_db.py
+++ b/swh/storage/tests/masking/test_db.py
@@ -255,6 +255,20 @@ def test_swhid_lifecycle(masking_admin: MaskingAdmin, masking_query: MaskingQuer
     assert dict(masking_query.iter_masked_swhids()) == expected
 
 
+def test_set_display_name(masking_admin: MaskingAdmin, masking_query: MaskingQuery):
+    assert masking_query.display_name([b"author1@example.com"]) == {}
+    assert masking_query.display_name([b"author2@example.com"]) == {}
+
+    masking_admin.set_display_name(
+        b"author1@example.com", b"author2 <author2@example.com>"
+    )
+
+    assert masking_query.display_name([b"author1@example.com"]) == {
+        b"author1@example.com": b"author2 <author2@example.com>"
+    }
+    assert masking_query.display_name([b"author2@example.com"]) == {}
+
+
 def test_query_metrics(
     masking_admin: MaskingAdmin, masking_query: MaskingQuery, mocker
 ):
diff --git a/swh/storage/tests/masking/test_proxy.py b/swh/storage/tests/masking/test_proxy.py
index 4cd9f5d81..6db1f2e38 100644
--- a/swh/storage/tests/masking/test_proxy.py
+++ b/swh/storage/tests/masking/test_proxy.py
@@ -36,23 +36,6 @@ def now() -> datetime.datetime:
     return datetime.datetime.now(tz=datetime.timezone.utc)
 
 
-@pytest.fixture
-def swh_storage_backend_config():
-    return {
-        "cls": "memory",
-        "journal_writer": {
-            "cls": "memory",
-        },
-    }
-
-
-@pytest.fixture
-def swh_storage(masking_db_postgresql, swh_storage_backend):
-    return MaskingProxyStorage(
-        masking_db=masking_db_postgresql.info.dsn, storage=swh_storage_backend
-    )
-
-
 @pytest.fixture
 def set_object_visibility(masking_admin):
     # Create a request
@@ -404,7 +387,7 @@ def test_revision_log(swh_storage, set_object_visibility):
 
     # But the parent is properly masked
     assert_masked_objects_raise(
-        lambda: swh_storage.revision_log([StorageData.revision2.id], limit=2),
+        lambda: list(swh_storage.revision_log([StorageData.revision2.id], limit=2)),
         [StorageData.revision.swhid().to_extended()],
         set_object_visibility,
     )
@@ -426,7 +409,9 @@ def test_revision_shortlog(swh_storage, set_object_visibility):
 
     # But the parent is properly masked
     assert_masked_objects_raise(
-        lambda: swh_storage.revision_shortlog([StorageData.revision2.id], limit=2),
+        lambda: list(
+            swh_storage.revision_shortlog([StorageData.revision2.id], limit=2)
+        ),
         [StorageData.revision.swhid().to_extended()],
         set_object_visibility,
     )
diff --git a/swh/storage/tests/masking/test_proxy_masking.py b/swh/storage/tests/masking/test_proxy_masking.py
index 82d72b836..0fae13a17 100644
--- a/swh/storage/tests/masking/test_proxy_masking.py
+++ b/swh/storage/tests/masking/test_proxy_masking.py
@@ -13,17 +13,6 @@ from swh.storage.proxies.masking.db import MaskedState
 from swh.storage.tests.storage_data import StorageData
 from swh.storage.tests.test_in_memory import TestInMemoryStorage as _TestStorage
 
-
-@pytest.fixture
-def swh_storage_backend_config():
-    yield {
-        "cls": "memory",
-        "journal_writer": {
-            "cls": "memory",
-        },
-    }
-
-
 MASKED_SWHIDS = {
     StorageData.content.swhid().to_extended(),
     StorageData.directory.swhid().to_extended(),
diff --git a/swh/storage/tests/masking/test_proxy_no_masking.py b/swh/storage/tests/masking/test_proxy_passthrough.py
similarity index 62%
rename from swh/storage/tests/masking/test_proxy_no_masking.py
rename to swh/storage/tests/masking/test_proxy_passthrough.py
index 0e8c1a112..ad7fadf0d 100644
--- a/swh/storage/tests/masking/test_proxy_no_masking.py
+++ b/swh/storage/tests/masking/test_proxy_passthrough.py
@@ -10,24 +10,10 @@ from swh.storage.proxies.masking import MaskingProxyStorage
 from swh.storage.tests.test_in_memory import TestInMemoryStorage as _TestStorage
 
 
-@pytest.fixture
-def swh_storage_backend_config():
-    yield {
-        "cls": "memory",
-        "journal_writer": {
-            "cls": "memory",
-        },
-    }
-
-
-@pytest.fixture
-def swh_storage(masking_db_postgresql, swh_storage_backend):
-    return MaskingProxyStorage(
-        masking_db=masking_db_postgresql.info.dsn, storage=swh_storage_backend
-    )
-
-
 class TestStorage(_TestStorage):
     @pytest.mark.xfail(reason="typing.Protocol instance check is annoying")
     def test_types(self, *args, **kwargs):
         super().test_types(*args, **kwargs)
+
+    def test_storage_type(self, swh_storage):
+        assert isinstance(swh_storage, MaskingProxyStorage)
diff --git a/swh/storage/tests/masking/test_proxy_patching.py b/swh/storage/tests/masking/test_proxy_patching.py
new file mode 100644
index 000000000..0a7f569ee
--- /dev/null
+++ b/swh/storage/tests/masking/test_proxy_patching.py
@@ -0,0 +1,330 @@
+# Copyright (C) 2024 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import attr
+
+from swh.core.api.classes import stream_results
+from swh.model.model import Person
+from swh.storage.proxies.masking.db import MaskingAdmin
+from swh.storage.tests.storage_data import StorageData
+
+
+def test_revision_get(swh_storage, masking_admin: MaskingAdmin):
+    revision = StorageData.revision
+    revision2 = StorageData.revision2
+    revision3 = StorageData.revision4  # StorageData.rev3 has same author/committer as 2
+
+    assert (
+        revision.author is not None
+        and revision.author.email is not None
+        and revision2.committer is not None
+        and revision2.committer.email is not None
+    )  # for mypy
+
+    swh_storage.revision_add([revision, revision2, revision3])
+    assert swh_storage.revision_get([revision.id, revision2.id, revision3.id]) == [
+        revision,
+        revision2,
+        revision3,
+    ]
+
+    display_name1 = b"New Author <new-author1@example.org>"
+    display_name2 = b"New Author <new-author2@example.org>"
+    masking_admin.set_display_name(revision.author.email, display_name1)
+    masking_admin.set_display_name(revision2.committer.email, display_name2)
+
+    assert swh_storage.revision_get([revision.id, revision2.id, revision3.id]) == [
+        attr.evolve(revision, author=Person.from_fullname(display_name1)),
+        attr.evolve(revision2, committer=Person.from_fullname(display_name2)),
+        revision3,
+    ]
+
+
+def test_revision_get_none_author(swh_storage, masking_admin: MaskingAdmin):
+    revision = attr.evolve(
+        StorageData.revision,
+        author=None,
+        date=None,
+        committer=None,
+        committer_date=None,
+    )
+    revision2 = StorageData.revision2
+    assert revision2.author is not None and revision2.committer is not None  # for mypy
+    revision2 = attr.evolve(
+        revision2,
+        author=attr.evolve(revision2.author, email=None),
+        committer=attr.evolve(revision2.committer, email=None),
+    )
+
+    swh_storage.revision_add([revision, revision2])
+    assert swh_storage.revision_get([revision.id, revision2.id]) == [
+        revision,
+        revision2,
+    ]
+
+    display_name = b"New Author <new-author@example.org>"
+    masking_admin.set_display_name(b"old-author@example.org", display_name)
+
+    assert swh_storage.revision_get([revision.id, revision2.id]) == [
+        revision,
+        revision2,
+    ]
+
+
+def test_revision_log(swh_storage, masking_admin: MaskingAdmin):
+    revision = StorageData.revision
+    revision2 = StorageData.revision2
+    revision3 = StorageData.revision3
+    revision4 = StorageData.revision4
+
+    assert (
+        revision.author is not None
+        and revision.author.email is not None
+        and revision2.committer is not None
+        and revision2.committer.email is not None
+    )  # for mypy
+
+    swh_storage.revision_add([revision, revision2, revision3, revision4])
+    assert list(swh_storage.revision_log([revision3.id, revision4.id])) == [
+        revision3.to_dict(),
+        revision.to_dict(),
+        revision2.to_dict(),
+        revision4.to_dict(),
+    ]
+
+    display_name1 = b"New Author <new-author1@example.org>"
+    display_name2 = b"New Author <new-author2@example.org>"
+    masking_admin.set_display_name(revision.author.email, display_name1)
+    masking_admin.set_display_name(revision2.committer.email, display_name2)
+
+    assert list(swh_storage.revision_log([revision3.id, revision4.id])) == [
+        attr.evolve(revision3, committer=Person.from_fullname(display_name2)).to_dict(),
+        attr.evolve(revision, author=Person.from_fullname(display_name1)).to_dict(),
+        attr.evolve(revision2, committer=Person.from_fullname(display_name2)).to_dict(),
+        revision4.to_dict(),
+    ]
+
+
+def test_revision_log_none_author(swh_storage, masking_admin: MaskingAdmin):
+    revision = attr.evolve(
+        StorageData.revision,
+        author=None,
+        date=None,
+        committer=None,
+        committer_date=None,
+    )
+    revision2 = StorageData.revision2
+    revision3 = StorageData.revision3
+    assert revision2.author is not None and revision2.committer is not None  # for mypy
+    revision2 = attr.evolve(
+        revision2,
+        author=attr.evolve(revision2.author, email=None),
+        committer=attr.evolve(revision2.committer, email=None),
+    )
+
+    swh_storage.revision_add([revision, revision2, revision3])
+    assert list(swh_storage.revision_log([revision3.id])) == [
+        revision3.to_dict(),
+        revision.to_dict(),
+        revision2.to_dict(),
+    ]
+
+    display_name = b"New Author <new-author@example.org>"
+    masking_admin.set_display_name(b"old-author@example.org", display_name)
+
+    assert list(swh_storage.revision_log([revision3.id])) == [
+        revision3.to_dict(),
+        revision.to_dict(),
+        revision2.to_dict(),
+    ]
+
+
+def test_revision_get_partition(swh_storage, masking_admin: MaskingAdmin):
+    revision = attr.evolve(StorageData.revision, metadata=None)
+    revision2 = StorageData.revision2
+    revision3 = StorageData.revision4  # StorageData.rev3 has same author/committer as 2
+
+    assert (
+        revision.author is not None
+        and revision.author.email is not None
+        and revision2.committer is not None
+        and revision2.committer.email is not None
+    )  # for mypy
+
+    swh_storage.revision_add([revision, revision2, revision3])
+    assert set(
+        stream_results(
+            swh_storage.revision_get_partition, partition_id=0, nb_partitions=1
+        )
+    ) == {
+        revision,
+        revision2,
+        revision3,
+    }
+
+    display_name1 = b"New Author <new-author1@example.org>"
+    display_name2 = b"New Author <new-author2@example.org>"
+    masking_admin.set_display_name(revision.author.email, display_name1)
+    masking_admin.set_display_name(revision2.committer.email, display_name2)
+
+    assert set(
+        stream_results(
+            swh_storage.revision_get_partition, partition_id=0, nb_partitions=1
+        )
+    ) == {
+        attr.evolve(revision, author=Person.from_fullname(display_name1)),
+        attr.evolve(revision2, committer=Person.from_fullname(display_name2)),
+        revision3,
+    }
+
+
+def test_revision_get_partition_none_author(swh_storage, masking_admin: MaskingAdmin):
+    revision = attr.evolve(
+        StorageData.revision,
+        author=None,
+        date=None,
+        committer=None,
+        committer_date=None,
+        metadata=None,
+    )
+    revision2 = StorageData.revision2
+    assert revision2.author is not None and revision2.committer is not None  # for mypy
+    revision2 = attr.evolve(
+        revision2,
+        author=attr.evolve(revision2.author, email=None),
+        committer=attr.evolve(revision2.committer, email=None),
+    )
+
+    swh_storage.revision_add([revision, revision2])
+    assert set(
+        stream_results(
+            swh_storage.revision_get_partition, partition_id=0, nb_partitions=1
+        )
+    ) == {
+        revision,
+        revision2,
+    }
+
+    display_name = b"New Author <new-author@example.org>"
+    masking_admin.set_display_name(b"old-author@example.org", display_name)
+
+    assert set(
+        stream_results(
+            swh_storage.revision_get_partition, partition_id=0, nb_partitions=1
+        )
+    ) == {
+        revision,
+        revision2,
+    }
+
+
+############
+
+
+def test_release_get(swh_storage, masking_admin: MaskingAdmin):
+    release = StorageData.release
+    release2 = StorageData.release2
+
+    assert release.author is not None and release.author.email is not None  # for mypy
+
+    swh_storage.release_add([release, release2])
+    assert swh_storage.release_get([release.id, release2.id]) == [
+        release,
+        release2,
+    ]
+
+    display_name1 = b"New Author <new-author1@example.org>"
+    masking_admin.set_display_name(release.author.email, display_name1)
+
+    assert swh_storage.release_get([release.id, release2.id]) == [
+        attr.evolve(release, author=Person.from_fullname(display_name1)),
+        release2,
+    ]
+
+
+def test_release_get_none_author(swh_storage, masking_admin: MaskingAdmin):
+    release = attr.evolve(
+        StorageData.release,
+        author=None,
+        date=None,
+    )
+    release2 = StorageData.release2
+    assert release2.author is not None  # for mypy
+    release2 = attr.evolve(
+        release2,
+        author=attr.evolve(release2.author, email=None),
+    )
+
+    swh_storage.release_add([release, release2])
+    assert swh_storage.release_get([release.id, release2.id]) == [
+        release,
+        release2,
+    ]
+
+    display_name = b"New Author <new-author@example.org>"
+    masking_admin.set_display_name(b"old-author@example.org", display_name)
+
+    assert swh_storage.release_get([release.id, release2.id]) == [
+        release,
+        release2,
+    ]
+
+
+def test_release_get_partition(swh_storage, masking_admin: MaskingAdmin):
+    release = attr.evolve(StorageData.release, metadata=None)
+    release2 = StorageData.release2
+
+    assert release.author is not None and release.author.email is not None  # for mypy
+
+    swh_storage.release_add([release, release2])
+    assert set(
+        stream_results(
+            swh_storage.release_get_partition, partition_id=0, nb_partitions=1
+        )
+    ) == {
+        release,
+        release2,
+    }
+
+    display_name1 = b"New Author <new-author1@example.org>"
+    masking_admin.set_display_name(release.author.email, display_name1)
+
+    assert set(
+        stream_results(
+            swh_storage.release_get_partition, partition_id=0, nb_partitions=1
+        )
+    ) == {
+        attr.evolve(release, author=Person.from_fullname(display_name1)),
+        release2,
+    }
+
+
+def test_release_get_partition_none_author(swh_storage, masking_admin: MaskingAdmin):
+    release = attr.evolve(
+        StorageData.release,
+        author=None,
+        date=None,
+        metadata=None,
+    )
+
+    swh_storage.release_add([release])
+    assert set(
+        stream_results(
+            swh_storage.release_get_partition, partition_id=0, nb_partitions=1
+        )
+    ) == {
+        release,
+    }
+
+    display_name = b"New Author <new-author@example.org>"
+    masking_admin.set_display_name(b"old-author@example.org", display_name)
+
+    assert set(
+        stream_results(
+            swh_storage.release_get_partition, partition_id=0, nb_partitions=1
+        )
+    ) == {
+        release,
+    }
-- 
GitLab