From 8b65e42f5adbeef9fd1ef8e28a908c82ee77204f Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <vlorentz@softwareheritage.org> Date: Tue, 15 Mar 2022 16:56:40 +0100 Subject: [PATCH] backfill: Make integer_ranges() work on str args + add typing to RANGE_GENERATORS Without the type annotation, mypy errors with 'Cannot call function of unknown type' when called from a type-checked function. --- swh/storage/backfill.py | 49 +++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/swh/storage/backfill.py b/swh/storage/backfill.py index 54908b48e..4b0447961 100644 --- a/swh/storage/backfill.py +++ b/swh/storage/backfill.py @@ -16,7 +16,7 @@ the journal. """ import logging -from typing import Any, Callable, Dict, Optional +from typing import Any, Callable, Dict, Iterable, Iterator, Optional, Tuple, Union from swh.core.db import BaseDb from swh.model.model import ( @@ -320,15 +320,17 @@ def object_to_offset(object_id, numbits): return int.from_bytes(truncated_id_bytes, byteorder="big") >> shift_bits -def byte_ranges(numbits, start_object=None, end_object=None): +def byte_ranges( + numbits: int, start_object: Optional[str] = None, end_object: Optional[str] = None +) -> Iterator[Tuple[Optional[bytes], Optional[bytes]]]: """Generate start/end pairs of bytes spanning numbits bits and constrained by optional start_object and end_object. Args: - numbits (int): Number of bits in which we divide input space - start_object (str): Hex object id contained in the first range + numbits: Number of bits in which we divide input space + start_object: Hex object id contained in the first range returned - end_object (str): Hex object id contained in the last range + end_object: Hex object id contained in the last range returned Yields: @@ -361,7 +363,9 @@ def byte_ranges(numbits, start_object=None, end_object=None): yield to_bytes(start), to_bytes(end) -def raw_extrinsic_metadata_target_ranges(start_object=None, end_object=None): +def raw_extrinsic_metadata_target_ranges( + start_object: Optional[str] = None, end_object: Optional[str] = None +) -> Iterator[Tuple[Optional[str], Optional[str]]]: """Generate ranges of values for the `target` attribute of `raw_extrinsic_metadata` objects. @@ -436,17 +440,26 @@ def raw_extrinsic_metadata_target_ranges(start_object=None, end_object=None): yield start_swhid, end_object -def integer_ranges(start, end, block_size=1000): - for start in range(start, end, block_size): - if start == 0: +def integer_ranges( + start: str, end: str, block_size: int = 1000 +) -> Iterator[Tuple[Optional[int], Optional[int]]]: + for range_start in range(int(start), int(end), block_size): + if range_start == 0: yield None, block_size - elif start + block_size > end: - yield start, end + elif range_start + block_size > int(end): + yield range_start, int(end) else: - yield start, start + block_size + yield range_start, range_start + block_size -RANGE_GENERATORS = { +RANGE_GENERATORS: Dict[ + str, + Union[ + Callable[[str, str], Iterable[Tuple[Optional[str], Optional[str]]]], + Callable[[str, str], Iterable[Tuple[Optional[bytes], Optional[bytes]]]], + Callable[[str, str], Iterable[Tuple[Optional[int], Optional[int]]]], + ], +] = { "content": lambda start, end: byte_ranges(24, start, end), "skipped_content": lambda start, end: [(None, None)], "directory": lambda start, end: byte_ranges(24, start, end), @@ -608,14 +621,8 @@ class JournalBackfiller: ) if object_type in ["origin", "origin_visit", "origin_visit_status"]: - if start_object: - start_object = int(start_object) - else: - start_object = 0 - if end_object: - end_object = int(end_object) - else: - end_object = 100 * 1000 * 1000 # hard-coded limit + start_object = start_object or "0" + end_object = end_object or str(100_000_000) # hard-coded limit return start_object, end_object -- GitLab