From e2700570292655529fb004538bb816e87afb0ae4 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <vlorentz@softwareheritage.org> Date: Fri, 31 May 2024 13:59:42 +0200 Subject: [PATCH] Introduce an object patching proxy feature in the MaskingProxy This is an alternative to modifying the `person` table, as that technique only worked on the PostgreSQL backend and not with Cassandra, as it does not have a `person` table (authors and committers are inlined in the revision and release tables). This does not come with cli tooling to manage the display name table entries for now. --- requirements-test.txt | 1 + swh/storage/proxies/masking/__init__.py | 190 +++++++++- swh/storage/proxies/masking/db.py | 50 ++- swh/storage/proxies/masking/sql/30-schema.sql | 13 + .../proxies/masking/sql/upgrades/194.sql | 14 + swh/storage/tests/masking/conftest.py | 18 + swh/storage/tests/masking/test_db.py | 14 + swh/storage/tests/masking/test_proxy.py | 23 +- .../tests/masking/test_proxy_masking.py | 11 - ...o_masking.py => test_proxy_passthrough.py} | 20 +- .../tests/masking/test_proxy_patching.py | 330 ++++++++++++++++++ 11 files changed, 627 insertions(+), 57 deletions(-) create mode 100644 swh/storage/proxies/masking/sql/upgrades/194.sql rename swh/storage/tests/masking/{test_proxy_no_masking.py => test_proxy_passthrough.py} (62%) create mode 100644 swh/storage/tests/masking/test_proxy_patching.py diff --git a/requirements-test.txt b/requirements-test.txt index d5f0ec703..3c72b7aa1 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -8,6 +8,7 @@ pytest-mock swh.core[testing] >= 3.0.0 swh.model[testing] >= 6.13.0 pytz +pytest-postgresql pytest-redis pytest-xdist types-python-dateutil diff --git a/swh/storage/proxies/masking/__init__.py b/swh/storage/proxies/masking/__init__.py index d578e6a68..d36943c99 100644 --- a/swh/storage/proxies/masking/__init__.py +++ b/swh/storage/proxies/masking/__init__.py @@ -6,22 +6,35 @@ from contextlib import contextmanager import functools import inspect -from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Union - +import itertools +from typing import ( + Any, + Callable, + Dict, + Iterable, + Iterator, + List, + Optional, + TypeVar, + Union, +) + +import attr import psycopg2.pool -from swh.core.api.classes import PagedResult +from swh.core.utils import grouper from swh.model.hashutil import MultiHash -from swh.model.model import Origin +from swh.model.model import Origin, Person, Release, Revision, Sha1Git from swh.model.swhids import ExtendedObjectType, ExtendedSWHID from swh.storage import get_storage from swh.storage.exc import MaskedObjectException -from swh.storage.interface import HashDict, Sha1, StorageInterface +from swh.storage.interface import HashDict, PagedResult, Sha1, StorageInterface, TResult from swh.storage.metrics import DifferentialTimer from swh.storage.proxies.masking.db import MaskedStatus from .db import MaskingQuery +BATCH_SIZE = 1024 MASKING_OVERHEAD_METRIC = "swh_storage_masking_overhead_seconds" @@ -192,7 +205,6 @@ class MaskingProxyStorage: def __getattr__(self, key): method = None - if key in self._methods_by_name: method = self._methods_by_name[key](key) else: @@ -233,7 +245,6 @@ class MaskingProxyStorage: "origin_get_by_sha1": self._getter_list, "content_find": self._getter_list, "skipped_content_find": self._getter_list, - "revision_log": self._getter_list, "revision_shortlog": self._getter_list, "extid_get_from_target": self._getter_list, "raw_extrinsic_metadata_get_by_ids": self._getter_list, @@ -431,17 +442,19 @@ class MaskingProxyStorage: return newf def _raise_if_masked_result_in_list( - self, method_name: str, results: Iterable[Any] - ) -> None: + self, method_name: str, results: Iterable[TResult] + ) -> List[TResult]: """Raise a :exc:`MaskedObjectException` if any non-:const:`None` object in ``results`` is masked.""" result_swhids = set() + results = list(results) for result in results: if result is not None: result_swhids.update(self._get_swhids_in_result(method_name, result)) if result_swhids: self._raise_if_masked_swhids(list(result_swhids)) + return results def _getter_list( self, @@ -481,3 +494,162 @@ class MaskingProxyStorage: return results return newf + + # Patching proxy feature set + + TRevision = TypeVar("TRevision", Revision, Optional[Revision]) + + def _apply_revision_display_names( + self, revisions: List[TRevision] + ) -> List[TRevision]: + emails = set() + for rev in revisions: + if ( + rev is not None + and rev.author is not None + and rev.author.email # ignore None or empty email addresses + ): + emails.add(rev.author.email) + if ( + rev is not None + and rev.committer is not None + and rev.committer.email # ignore None or empty email addresses + ): + emails.add(rev.committer.email) + + with self._masking_query() as q: + display_names = q.display_name(list(emails)) + + # Short path for the common case + if not display_names: + return revisions + + persons: Dict[Optional[bytes], Person] = { + email: Person.from_fullname(display_name) + for (email, display_name) in display_names.items() + } + + return [ + None + if revision is None + else attr.evolve( + revision, + author=revision.author + if revision.author is None + else persons.get(revision.author.email, revision.author), + committer=revision.committer + if revision.committer is None + else persons.get(revision.committer.email, revision.committer), + ) + for revision in revisions + ] + + TRelease = TypeVar("TRelease", Release, Optional[Release]) + + def _apply_release_display_names(self, releases: List[TRelease]) -> List[TRelease]: + emails = set() + for rel in releases: + if ( + rel is not None + and rel.author is not None + and rel.author.email # ignore None or empty email addresses + ): + emails.add(rel.author.email) + + with self._masking_query() as q: + display_names = q.display_name(list(emails)) + # Short path for the common case + if not display_names: + return releases + + persons: Dict[Optional[bytes], Person] = { + email: Person.from_fullname(display_name) + for (email, display_name) in display_names.items() + } + + return [ + None + if release is None + else attr.evolve( + release, + author=release.author + if release.author is None + else persons.get(release.author.email, release.author), + ) + for release in releases + ] + + def revision_get( + self, revision_ids: List[Sha1Git], ignore_displayname: bool = False + ) -> List[Optional[Revision]]: + revisions = self.storage.revision_get(revision_ids) + self._raise_if_masked_result_in_list("revision_get", revisions) + return self._apply_revision_display_names(revisions) + + def revision_log( + self, + revisions: List[Sha1Git], + ignore_displayname: bool = False, + limit: Optional[int] = None, + ) -> Iterable[Optional[Dict[str, Any]]]: + revision_batches = grouper( + self.storage.revision_log(revisions, limit=limit), BATCH_SIZE + ) + yield from map( + Revision.to_dict, + itertools.chain.from_iterable( + self._apply_revision_display_names( + self._raise_if_masked_result_in_list( + "revision_log", list(map(Revision.from_dict, revision_batch)) + ) + ) + for revision_batch in revision_batches + ), + ) + + def revision_get_partition( + self, + partition_id: int, + nb_partitions: int, + page_token: Optional[str] = None, + limit: int = 1000, + ) -> PagedResult[Revision]: + page: PagedResult[Revision] = self.storage.revision_get_partition( + partition_id, nb_partitions, page_token, limit + ) + return PagedResult( + results=self._apply_revision_display_names( + self._raise_if_masked_result_in_list( + "revision_get_parition", page.results + ) + ), + next_page_token=page.next_page_token, + ) + + def release_get( + self, releases: List[Sha1Git], ignore_displayname: bool = False + ) -> List[Optional[Release]]: + return self._apply_release_display_names( + self._raise_if_masked_result_in_list( + "release_get", self.storage.release_get(releases) + ) + ) + + def release_get_partition( + self, + partition_id: int, + nb_partitions: int, + page_token: Optional[str] = None, + limit: int = 1000, + ) -> PagedResult[Release]: + page = self.storage.release_get_partition( + partition_id, nb_partitions, page_token, limit + ) + return PagedResult( + results=self._apply_release_display_names( + self._raise_if_masked_result_in_list( + "release_get_partition", page.results + ) + ), + next_page_token=page.next_page_token, + ) diff --git a/swh/storage/proxies/masking/db.py b/swh/storage/proxies/masking/db.py index 9c28698dc..a3dff71af 100644 --- a/swh/storage/proxies/masking/db.py +++ b/swh/storage/proxies/masking/db.py @@ -88,9 +88,20 @@ class MaskedObject: state = attr.ib(type=MaskedState) +@attr.s +class DisplayName: + """A request for masking a set of objects""" + + original_email = attr.ib(type=bytes) + """Email on revision/release objects to match before applying the display name""" + + display_name = attr.ib(type=bytes) + """Full name, usually of the form ``Name <email>``, used for display queries""" + + class MaskingDb(BaseDb): # we started with 192, because this used to be part of the main storage db - current_version = 193 + current_version = 194 def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -329,6 +340,20 @@ class MaskingAdmin(MaskingDb): return MaskingRequestHistory(request=request_id, date=res[0], message=message) + def set_display_name(self, original_email: bytes, display_name: bytes) -> None: + """Updates the display name of the person identified by the email address.""" + cur = self.cursor() + + cur.execute( + """ + INSERT INTO display_name (original_email, display_name) + VALUES (%s, %s) + ON CONFLICT (original_email) DO UPDATE + SET display_name=EXCLUDED.display_name + """, + (original_email, display_name), + ) + def get_history(self, request_id: UUID) -> List[MaskingRequestHistory]: """Get the history of a given request. @@ -406,6 +431,29 @@ class MaskingQuery(MaskingDb): statsd.increment(METRIC_MASKED_TOTAL, len(ret)) return ret + def display_name(self, original_emails: List[bytes]) -> Dict[bytes, bytes]: + """Returns the display name of the person identified by each ``original_email``, + if any. + """ + cur = self.cursor() + + ret: Dict[bytes, bytes] = {} + + for original_email, display_name in psycopg2.extras.execute_values( + cur, + """ + SELECT original_email, display_name + FROM display_name + INNER JOIN (VALUES %s) v(original_email) + USING (original_email) + """, + [(email,) for email in original_emails], + fetch=True, + ): + ret[original_email] = display_name + + return ret + def iter_masked_swhids(self) -> Iterator[Tuple[ExtendedSWHID, List[MaskedStatus]]]: """Returns the complete list of masked SWHIDs. diff --git a/swh/storage/proxies/masking/sql/30-schema.sql b/swh/storage/proxies/masking/sql/30-schema.sql index 373049d23..68f4bfd6b 100644 --- a/swh/storage/proxies/masking/sql/30-schema.sql +++ b/swh/storage/proxies/masking/sql/30-schema.sql @@ -41,3 +41,16 @@ comment on column masked_object.object_id is 'The object_id part of the object'' comment on column masked_object.object_type is 'The object_type part of the object''s SWHID'; comment on column masked_object.request is 'Reference to the affecting request'; comment on column masked_object.state is 'The degree to which the object is masked as a result of the request'; + + + +-- Used only by the patching proxy, not the masking proxy + +create table if not exists display_name ( + original_email bytea not null primary key, + display_name bytea not null +); + +comment on table display_name is 'Map from revision/release email to current full name'; +comment on column display_name.original_email is 'Email on revision/release objects to match before applying the display name'; +comment on column display_name.display_name is 'Full name, usually of the form `Name <email>, used for display queries'; diff --git a/swh/storage/proxies/masking/sql/upgrades/194.sql b/swh/storage/proxies/masking/sql/upgrades/194.sql new file mode 100644 index 000000000..b9c0807de --- /dev/null +++ b/swh/storage/proxies/masking/sql/upgrades/194.sql @@ -0,0 +1,14 @@ +-- +-- SWH Masking Proxy DB schema upgrade +-- from_version: 193 +-- to_version: 194 +-- description: Add the patching proxy display_name table + +create table if not exists display_name ( + original_email bytea not null primary key, + display_name bytea not null +); + +comment on table display_name is 'Map from revision/release email to current full name'; +comment on column display_name.original_email is 'Email on revision/release objects to match before applying the display name'; +comment on column display_name.display_name is 'Full name, usually of the form `Name <email>, used for display queries'; diff --git a/swh/storage/tests/masking/conftest.py b/swh/storage/tests/masking/conftest.py index 6bddd9e53..07916e5a5 100644 --- a/swh/storage/tests/masking/conftest.py +++ b/swh/storage/tests/masking/conftest.py @@ -9,6 +9,7 @@ import pytest from pytest_postgresql import factories from swh.core.db.db_utils import initialize_database_for_module +from swh.storage.proxies.masking import MaskingProxyStorage from swh.storage.proxies.masking.db import MaskingAdmin, MaskingQuery masking_db_postgresql_proc = factories.postgresql_proc( @@ -35,3 +36,20 @@ def masking_admin(masking_db_postgresql) -> MaskingAdmin: @pytest.fixture def masking_query(masking_db_postgresql) -> MaskingQuery: return MaskingQuery.connect(masking_db_postgresql.info.dsn) + + +@pytest.fixture +def swh_storage_backend_config(): + yield { + "cls": "memory", + "journal_writer": { + "cls": "memory", + }, + } + + +@pytest.fixture +def swh_storage(masking_db_postgresql, swh_storage_backend): + return MaskingProxyStorage( + masking_db=masking_db_postgresql.info.dsn, storage=swh_storage_backend + ) diff --git a/swh/storage/tests/masking/test_db.py b/swh/storage/tests/masking/test_db.py index 09f95ceba..5dc050b33 100644 --- a/swh/storage/tests/masking/test_db.py +++ b/swh/storage/tests/masking/test_db.py @@ -255,6 +255,20 @@ def test_swhid_lifecycle(masking_admin: MaskingAdmin, masking_query: MaskingQuer assert dict(masking_query.iter_masked_swhids()) == expected +def test_set_display_name(masking_admin: MaskingAdmin, masking_query: MaskingQuery): + assert masking_query.display_name([b"author1@example.com"]) == {} + assert masking_query.display_name([b"author2@example.com"]) == {} + + masking_admin.set_display_name( + b"author1@example.com", b"author2 <author2@example.com>" + ) + + assert masking_query.display_name([b"author1@example.com"]) == { + b"author1@example.com": b"author2 <author2@example.com>" + } + assert masking_query.display_name([b"author2@example.com"]) == {} + + def test_query_metrics( masking_admin: MaskingAdmin, masking_query: MaskingQuery, mocker ): diff --git a/swh/storage/tests/masking/test_proxy.py b/swh/storage/tests/masking/test_proxy.py index 4cd9f5d81..6db1f2e38 100644 --- a/swh/storage/tests/masking/test_proxy.py +++ b/swh/storage/tests/masking/test_proxy.py @@ -36,23 +36,6 @@ def now() -> datetime.datetime: return datetime.datetime.now(tz=datetime.timezone.utc) -@pytest.fixture -def swh_storage_backend_config(): - return { - "cls": "memory", - "journal_writer": { - "cls": "memory", - }, - } - - -@pytest.fixture -def swh_storage(masking_db_postgresql, swh_storage_backend): - return MaskingProxyStorage( - masking_db=masking_db_postgresql.info.dsn, storage=swh_storage_backend - ) - - @pytest.fixture def set_object_visibility(masking_admin): # Create a request @@ -404,7 +387,7 @@ def test_revision_log(swh_storage, set_object_visibility): # But the parent is properly masked assert_masked_objects_raise( - lambda: swh_storage.revision_log([StorageData.revision2.id], limit=2), + lambda: list(swh_storage.revision_log([StorageData.revision2.id], limit=2)), [StorageData.revision.swhid().to_extended()], set_object_visibility, ) @@ -426,7 +409,9 @@ def test_revision_shortlog(swh_storage, set_object_visibility): # But the parent is properly masked assert_masked_objects_raise( - lambda: swh_storage.revision_shortlog([StorageData.revision2.id], limit=2), + lambda: list( + swh_storage.revision_shortlog([StorageData.revision2.id], limit=2) + ), [StorageData.revision.swhid().to_extended()], set_object_visibility, ) diff --git a/swh/storage/tests/masking/test_proxy_masking.py b/swh/storage/tests/masking/test_proxy_masking.py index 82d72b836..0fae13a17 100644 --- a/swh/storage/tests/masking/test_proxy_masking.py +++ b/swh/storage/tests/masking/test_proxy_masking.py @@ -13,17 +13,6 @@ from swh.storage.proxies.masking.db import MaskedState from swh.storage.tests.storage_data import StorageData from swh.storage.tests.test_in_memory import TestInMemoryStorage as _TestStorage - -@pytest.fixture -def swh_storage_backend_config(): - yield { - "cls": "memory", - "journal_writer": { - "cls": "memory", - }, - } - - MASKED_SWHIDS = { StorageData.content.swhid().to_extended(), StorageData.directory.swhid().to_extended(), diff --git a/swh/storage/tests/masking/test_proxy_no_masking.py b/swh/storage/tests/masking/test_proxy_passthrough.py similarity index 62% rename from swh/storage/tests/masking/test_proxy_no_masking.py rename to swh/storage/tests/masking/test_proxy_passthrough.py index 0e8c1a112..ad7fadf0d 100644 --- a/swh/storage/tests/masking/test_proxy_no_masking.py +++ b/swh/storage/tests/masking/test_proxy_passthrough.py @@ -10,24 +10,10 @@ from swh.storage.proxies.masking import MaskingProxyStorage from swh.storage.tests.test_in_memory import TestInMemoryStorage as _TestStorage -@pytest.fixture -def swh_storage_backend_config(): - yield { - "cls": "memory", - "journal_writer": { - "cls": "memory", - }, - } - - -@pytest.fixture -def swh_storage(masking_db_postgresql, swh_storage_backend): - return MaskingProxyStorage( - masking_db=masking_db_postgresql.info.dsn, storage=swh_storage_backend - ) - - class TestStorage(_TestStorage): @pytest.mark.xfail(reason="typing.Protocol instance check is annoying") def test_types(self, *args, **kwargs): super().test_types(*args, **kwargs) + + def test_storage_type(self, swh_storage): + assert isinstance(swh_storage, MaskingProxyStorage) diff --git a/swh/storage/tests/masking/test_proxy_patching.py b/swh/storage/tests/masking/test_proxy_patching.py new file mode 100644 index 000000000..0a7f569ee --- /dev/null +++ b/swh/storage/tests/masking/test_proxy_patching.py @@ -0,0 +1,330 @@ +# Copyright (C) 2024 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import attr + +from swh.core.api.classes import stream_results +from swh.model.model import Person +from swh.storage.proxies.masking.db import MaskingAdmin +from swh.storage.tests.storage_data import StorageData + + +def test_revision_get(swh_storage, masking_admin: MaskingAdmin): + revision = StorageData.revision + revision2 = StorageData.revision2 + revision3 = StorageData.revision4 # StorageData.rev3 has same author/committer as 2 + + assert ( + revision.author is not None + and revision.author.email is not None + and revision2.committer is not None + and revision2.committer.email is not None + ) # for mypy + + swh_storage.revision_add([revision, revision2, revision3]) + assert swh_storage.revision_get([revision.id, revision2.id, revision3.id]) == [ + revision, + revision2, + revision3, + ] + + display_name1 = b"New Author <new-author1@example.org>" + display_name2 = b"New Author <new-author2@example.org>" + masking_admin.set_display_name(revision.author.email, display_name1) + masking_admin.set_display_name(revision2.committer.email, display_name2) + + assert swh_storage.revision_get([revision.id, revision2.id, revision3.id]) == [ + attr.evolve(revision, author=Person.from_fullname(display_name1)), + attr.evolve(revision2, committer=Person.from_fullname(display_name2)), + revision3, + ] + + +def test_revision_get_none_author(swh_storage, masking_admin: MaskingAdmin): + revision = attr.evolve( + StorageData.revision, + author=None, + date=None, + committer=None, + committer_date=None, + ) + revision2 = StorageData.revision2 + assert revision2.author is not None and revision2.committer is not None # for mypy + revision2 = attr.evolve( + revision2, + author=attr.evolve(revision2.author, email=None), + committer=attr.evolve(revision2.committer, email=None), + ) + + swh_storage.revision_add([revision, revision2]) + assert swh_storage.revision_get([revision.id, revision2.id]) == [ + revision, + revision2, + ] + + display_name = b"New Author <new-author@example.org>" + masking_admin.set_display_name(b"old-author@example.org", display_name) + + assert swh_storage.revision_get([revision.id, revision2.id]) == [ + revision, + revision2, + ] + + +def test_revision_log(swh_storage, masking_admin: MaskingAdmin): + revision = StorageData.revision + revision2 = StorageData.revision2 + revision3 = StorageData.revision3 + revision4 = StorageData.revision4 + + assert ( + revision.author is not None + and revision.author.email is not None + and revision2.committer is not None + and revision2.committer.email is not None + ) # for mypy + + swh_storage.revision_add([revision, revision2, revision3, revision4]) + assert list(swh_storage.revision_log([revision3.id, revision4.id])) == [ + revision3.to_dict(), + revision.to_dict(), + revision2.to_dict(), + revision4.to_dict(), + ] + + display_name1 = b"New Author <new-author1@example.org>" + display_name2 = b"New Author <new-author2@example.org>" + masking_admin.set_display_name(revision.author.email, display_name1) + masking_admin.set_display_name(revision2.committer.email, display_name2) + + assert list(swh_storage.revision_log([revision3.id, revision4.id])) == [ + attr.evolve(revision3, committer=Person.from_fullname(display_name2)).to_dict(), + attr.evolve(revision, author=Person.from_fullname(display_name1)).to_dict(), + attr.evolve(revision2, committer=Person.from_fullname(display_name2)).to_dict(), + revision4.to_dict(), + ] + + +def test_revision_log_none_author(swh_storage, masking_admin: MaskingAdmin): + revision = attr.evolve( + StorageData.revision, + author=None, + date=None, + committer=None, + committer_date=None, + ) + revision2 = StorageData.revision2 + revision3 = StorageData.revision3 + assert revision2.author is not None and revision2.committer is not None # for mypy + revision2 = attr.evolve( + revision2, + author=attr.evolve(revision2.author, email=None), + committer=attr.evolve(revision2.committer, email=None), + ) + + swh_storage.revision_add([revision, revision2, revision3]) + assert list(swh_storage.revision_log([revision3.id])) == [ + revision3.to_dict(), + revision.to_dict(), + revision2.to_dict(), + ] + + display_name = b"New Author <new-author@example.org>" + masking_admin.set_display_name(b"old-author@example.org", display_name) + + assert list(swh_storage.revision_log([revision3.id])) == [ + revision3.to_dict(), + revision.to_dict(), + revision2.to_dict(), + ] + + +def test_revision_get_partition(swh_storage, masking_admin: MaskingAdmin): + revision = attr.evolve(StorageData.revision, metadata=None) + revision2 = StorageData.revision2 + revision3 = StorageData.revision4 # StorageData.rev3 has same author/committer as 2 + + assert ( + revision.author is not None + and revision.author.email is not None + and revision2.committer is not None + and revision2.committer.email is not None + ) # for mypy + + swh_storage.revision_add([revision, revision2, revision3]) + assert set( + stream_results( + swh_storage.revision_get_partition, partition_id=0, nb_partitions=1 + ) + ) == { + revision, + revision2, + revision3, + } + + display_name1 = b"New Author <new-author1@example.org>" + display_name2 = b"New Author <new-author2@example.org>" + masking_admin.set_display_name(revision.author.email, display_name1) + masking_admin.set_display_name(revision2.committer.email, display_name2) + + assert set( + stream_results( + swh_storage.revision_get_partition, partition_id=0, nb_partitions=1 + ) + ) == { + attr.evolve(revision, author=Person.from_fullname(display_name1)), + attr.evolve(revision2, committer=Person.from_fullname(display_name2)), + revision3, + } + + +def test_revision_get_partition_none_author(swh_storage, masking_admin: MaskingAdmin): + revision = attr.evolve( + StorageData.revision, + author=None, + date=None, + committer=None, + committer_date=None, + metadata=None, + ) + revision2 = StorageData.revision2 + assert revision2.author is not None and revision2.committer is not None # for mypy + revision2 = attr.evolve( + revision2, + author=attr.evolve(revision2.author, email=None), + committer=attr.evolve(revision2.committer, email=None), + ) + + swh_storage.revision_add([revision, revision2]) + assert set( + stream_results( + swh_storage.revision_get_partition, partition_id=0, nb_partitions=1 + ) + ) == { + revision, + revision2, + } + + display_name = b"New Author <new-author@example.org>" + masking_admin.set_display_name(b"old-author@example.org", display_name) + + assert set( + stream_results( + swh_storage.revision_get_partition, partition_id=0, nb_partitions=1 + ) + ) == { + revision, + revision2, + } + + +############ + + +def test_release_get(swh_storage, masking_admin: MaskingAdmin): + release = StorageData.release + release2 = StorageData.release2 + + assert release.author is not None and release.author.email is not None # for mypy + + swh_storage.release_add([release, release2]) + assert swh_storage.release_get([release.id, release2.id]) == [ + release, + release2, + ] + + display_name1 = b"New Author <new-author1@example.org>" + masking_admin.set_display_name(release.author.email, display_name1) + + assert swh_storage.release_get([release.id, release2.id]) == [ + attr.evolve(release, author=Person.from_fullname(display_name1)), + release2, + ] + + +def test_release_get_none_author(swh_storage, masking_admin: MaskingAdmin): + release = attr.evolve( + StorageData.release, + author=None, + date=None, + ) + release2 = StorageData.release2 + assert release2.author is not None # for mypy + release2 = attr.evolve( + release2, + author=attr.evolve(release2.author, email=None), + ) + + swh_storage.release_add([release, release2]) + assert swh_storage.release_get([release.id, release2.id]) == [ + release, + release2, + ] + + display_name = b"New Author <new-author@example.org>" + masking_admin.set_display_name(b"old-author@example.org", display_name) + + assert swh_storage.release_get([release.id, release2.id]) == [ + release, + release2, + ] + + +def test_release_get_partition(swh_storage, masking_admin: MaskingAdmin): + release = attr.evolve(StorageData.release, metadata=None) + release2 = StorageData.release2 + + assert release.author is not None and release.author.email is not None # for mypy + + swh_storage.release_add([release, release2]) + assert set( + stream_results( + swh_storage.release_get_partition, partition_id=0, nb_partitions=1 + ) + ) == { + release, + release2, + } + + display_name1 = b"New Author <new-author1@example.org>" + masking_admin.set_display_name(release.author.email, display_name1) + + assert set( + stream_results( + swh_storage.release_get_partition, partition_id=0, nb_partitions=1 + ) + ) == { + attr.evolve(release, author=Person.from_fullname(display_name1)), + release2, + } + + +def test_release_get_partition_none_author(swh_storage, masking_admin: MaskingAdmin): + release = attr.evolve( + StorageData.release, + author=None, + date=None, + metadata=None, + ) + + swh_storage.release_add([release]) + assert set( + stream_results( + swh_storage.release_get_partition, partition_id=0, nb_partitions=1 + ) + ) == { + release, + } + + display_name = b"New Author <new-author@example.org>" + masking_admin.set_display_name(b"old-author@example.org", display_name) + + assert set( + stream_results( + swh_storage.release_get_partition, partition_id=0, nb_partitions=1 + ) + ) == { + release, + } -- GitLab