From cebe917a85d427355e837a69e3c28248a496d277 Mon Sep 17 00:00:00 2001 From: Franck Bret <franck.bret@octobus.net> Date: Tue, 13 Feb 2024 10:33:24 +0100 Subject: [PATCH] from_disk: Add optional progress callback Add an optional progress callback to `from_disk` method. It can returns the number of computed entries for each top entries traversed. This is useful for CLI, in particular to display progress information for SWH Scanner. --- swh/model/cli.py | 7 +++++-- swh/model/from_disk.py | 7 +++++++ swh/model/tests/test_from_disk.py | 11 +++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/swh/model/cli.py b/swh/model/cli.py index 7fd99ed0..9fd4c50f 100644 --- a/swh/model/cli.py +++ b/swh/model/cli.py @@ -5,7 +5,7 @@ import os import sys -from typing import Dict, Iterable, Optional +from typing import Callable, Dict, Iterable, Optional # WARNING: do not import unnecessary things here to keep cli startup time under # control @@ -74,6 +74,7 @@ def swhid_of_file_content(data) -> CoreSWHID: def model_of_dir( path: bytes, exclude_patterns: Optional[Iterable[bytes]] = None, + update_info: Optional[Callable[[int], None]] = None, ) -> Directory: from swh.model.from_disk import accept_all_paths, ignore_directories_patterns @@ -83,7 +84,9 @@ def model_of_dir( else accept_all_paths ) - return Directory.from_disk(path=path, path_filter=path_filter) + return Directory.from_disk( + path=path, path_filter=path_filter, progress_callback=update_info + ) def swhid_of_dir( diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py index 48d70161..26bb442b 100644 --- a/swh/model/from_disk.py +++ b/swh/model/from_disk.py @@ -453,6 +453,7 @@ class Directory(MerkleNode): Callable[[bytes, bytes, Optional[List[bytes]]], bool] ] = None, max_content_length: Optional[int] = None, + progress_callback: Optional[Callable[[int], None]] = None, ) -> "Directory": """Compute the Software Heritage objects for a given directory tree @@ -472,6 +473,8 @@ class Directory(MerkleNode): directory should be ignored. max_content_length (Optional[int]): if given, all contents larger than this will be skipped. + progress_callback (Optional function): if given, returns for each + non empty directories traversed the number of computed entries. """ top_path = path dirs: Dict[bytes, Directory] = {} @@ -504,6 +507,10 @@ class Directory(MerkleNode): dirs[root] = cls({"name": os.path.basename(root), "path": root}) dirs[root].update(entries) + if progress_callback is not None: + if len(entries) > 0: + progress_callback(len(entries)) + return dirs[top_path] def __init__(self, data=None): diff --git a/swh/model/tests/test_from_disk.py b/swh/model/tests/test_from_disk.py index 946fd533..e1b455ac 100644 --- a/swh/model/tests/test_from_disk.py +++ b/swh/model/tests/test_from_disk.py @@ -914,6 +914,17 @@ class DirectoryToObjects(DataMixin, unittest.TestCase): b"foofile", ] + def test_directory_progress_callback(self): + total = [] + + def update_info(arg): + assert type(arg) is int + total.append(arg) + + Directory.from_disk(path=self.tmpdir_name, progress_callback=update_info) + # Corresponds to the deeper files and directories plus the four top level ones + assert total == [1, 1, 1, 1, 4] + @pytest.mark.fs class TarballTest(DataMixin, unittest.TestCase): -- GitLab