From 46fdf4c6e37482b987328dbdf8f6274b7bad2b7e Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <vlorentz@softwareheritage.org> Date: Wed, 15 Mar 2023 16:04:24 +0100 Subject: [PATCH] storage_checker: Add more complete typing --- swh/scrubber/storage_checker.py | 50 ++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/swh/scrubber/storage_checker.py b/swh/scrubber/storage_checker.py index 3d9beb6..b98bc4b 100644 --- a/swh/scrubber/storage_checker.py +++ b/swh/scrubber/storage_checker.py @@ -11,7 +11,7 @@ import dataclasses import datetime import json import logging -from typing import Iterable, Optional, Tuple, Union +from typing import Any, Iterable, NoReturn, Optional, Tuple, Union import psycopg2 import tenacity @@ -31,7 +31,7 @@ from swh.model.model import ( from swh.storage.algos.directory import directory_get_many from swh.storage.algos.snapshot import snapshot_get_all_branches from swh.storage.cassandra.storage import CassandraStorage -from swh.storage.interface import StorageInterface +from swh.storage.interface import PagedResult, StorageInterface from swh.storage.postgresql.storage import Storage as PostgresqlStorage from .db import Datastore, ScrubberDb @@ -41,6 +41,16 @@ logger = logging.getLogger(__name__) ScrubbableObject = Union[Revision, Release, Snapshot, Directory, Content] +def assert_never(value: NoReturn, msg) -> NoReturn: + """mypy makes sure this function is never called, through exhaustive checking + of ``value`` in the parent function. + + See https://mypy.readthedocs.io/en/latest/literal_types.html#exhaustive-checks + for details. + """ + assert False, msg + + @contextlib.contextmanager def postgresql_storage_db(storage): db = storage.get_db() @@ -204,27 +214,41 @@ class StorageChecker: ) -> None: page_token = None while True: - if object_type in (swhids.ObjectType.RELEASE, swhids.ObjectType.REVISION): - method = getattr(self.storage, f"{self.object_type}_get_partition") - page = method(partition_id, self.nb_partitions, page_token=page_token) + page: PagedResult[Any] + objects: Iterable[ScrubbableObject] + if object_type is swhids.ObjectType.REVISION: + page = self.storage.revision_get_partition( + partition_id, self.nb_partitions, page_token=page_token + ) + objects = page.results + elif object_type is swhids.ObjectType.RELEASE: + page = self.storage.release_get_partition( + partition_id, self.nb_partitions, page_token=page_token + ) objects = page.results - elif object_type == swhids.ObjectType.DIRECTORY: + elif object_type is swhids.ObjectType.DIRECTORY: page = self.storage.directory_get_id_partition( partition_id, self.nb_partitions, page_token=page_token ) directory_ids = page.results - objects = list(directory_get_many(self.storage, directory_ids)) - elif object_type == swhids.ObjectType.SNAPSHOT: + objects = [] + for dir_ in directory_get_many(self.storage, directory_ids): + assert dir_ is not None, directory_ids + objects.append(dir_) + elif object_type is swhids.ObjectType.SNAPSHOT: page = self.storage.snapshot_get_id_partition( partition_id, self.nb_partitions, page_token=page_token ) snapshot_ids = page.results - objects = [ - snapshot_get_all_branches(self.storage, snapshot_id) - for snapshot_id in snapshot_ids - ] + objects = [] + for snapshot_id in snapshot_ids: + snp = snapshot_get_all_branches(self.storage, snapshot_id) + assert snp is not None + objects.append(snp) + elif object_type is swhids.ObjectType.CONTENT: + assert False, "storage_checker does not support content objects yet" else: - assert False, f"Unexpected object type: {object_type}" + assert_never(object_type, f"Unexpected object type: {object_type}") with self.statsd().timed( "batch_duration_seconds", tags={"operation": "check_hashes"} -- GitLab