diff --git a/swh/model/cli.py b/swh/model/cli.py index 7fd99ed0a1c51407fae814786ff2deb6aed72537..9fd4c50f60905621c092c8677f40ae9ff8c368ce 100644 --- a/swh/model/cli.py +++ b/swh/model/cli.py @@ -5,7 +5,7 @@ import os import sys -from typing import Dict, Iterable, Optional +from typing import Callable, Dict, Iterable, Optional # WARNING: do not import unnecessary things here to keep cli startup time under # control @@ -74,6 +74,7 @@ def swhid_of_file_content(data) -> CoreSWHID: def model_of_dir( path: bytes, exclude_patterns: Optional[Iterable[bytes]] = None, + update_info: Optional[Callable[[int], None]] = None, ) -> Directory: from swh.model.from_disk import accept_all_paths, ignore_directories_patterns @@ -83,7 +84,9 @@ def model_of_dir( else accept_all_paths ) - return Directory.from_disk(path=path, path_filter=path_filter) + return Directory.from_disk( + path=path, path_filter=path_filter, progress_callback=update_info + ) def swhid_of_dir( diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py index 48d701612424083b2d70db9125d9dca723068152..26bb442b7527c1ba4e7a0de166ed99fca52f7e09 100644 --- a/swh/model/from_disk.py +++ b/swh/model/from_disk.py @@ -453,6 +453,7 @@ class Directory(MerkleNode): Callable[[bytes, bytes, Optional[List[bytes]]], bool] ] = None, max_content_length: Optional[int] = None, + progress_callback: Optional[Callable[[int], None]] = None, ) -> "Directory": """Compute the Software Heritage objects for a given directory tree @@ -472,6 +473,8 @@ class Directory(MerkleNode): directory should be ignored. max_content_length (Optional[int]): if given, all contents larger than this will be skipped. + progress_callback (Optional function): if given, returns for each + non empty directories traversed the number of computed entries. """ top_path = path dirs: Dict[bytes, Directory] = {} @@ -504,6 +507,10 @@ class Directory(MerkleNode): dirs[root] = cls({"name": os.path.basename(root), "path": root}) dirs[root].update(entries) + if progress_callback is not None: + if len(entries) > 0: + progress_callback(len(entries)) + return dirs[top_path] def __init__(self, data=None): diff --git a/swh/model/tests/test_from_disk.py b/swh/model/tests/test_from_disk.py index 946fd533428a2d428794e1d99749de1c330e744c..e1b455acf9646e88ce1935bf9644cb81e39889d0 100644 --- a/swh/model/tests/test_from_disk.py +++ b/swh/model/tests/test_from_disk.py @@ -914,6 +914,17 @@ class DirectoryToObjects(DataMixin, unittest.TestCase): b"foofile", ] + def test_directory_progress_callback(self): + total = [] + + def update_info(arg): + assert type(arg) is int + total.append(arg) + + Directory.from_disk(path=self.tmpdir_name, progress_callback=update_info) + # Corresponds to the deeper files and directories plus the four top level ones + assert total == [1, 1, 1, 1, 4] + @pytest.mark.fs class TarballTest(DataMixin, unittest.TestCase):