Skip to content
Snippets Groups Projects
Commit ece18350 authored by Pierre-Yves David's avatar Pierre-Yves David
Browse files

from-disk-exclude: handle top_path with a final slash

Before this patch giving `from_disk` and directory path with final '/'s
would confuses the exclusion code as it would assume it need to exclude
one extra character to get the basename. This would swallow the first
char of the directory to exclude, leading to a righful crash.

This would affect directory excluded in the first directory only as sub
directory would be called without a '/'.

We fix the path at the top of the function as trailing '/'s could
confuse other code too in the future.
parent 8bb57608
No related branches found
No related tags found
No related merge requests found
......@@ -467,7 +467,15 @@ class Directory(MerkleNode):
progress_callback (Optional function): if given, returns for each
non empty directories traversed the number of computed entries.
"""
# the top might have been specified with a final slash. This will
# confuse various code.
#
# We should prevent '/' as is however
if 1 < len(path) and path[-1:] == b"/":
path = path[0:1] + path[1:].rstrip(b"/")
assert len(path) <= 1 or path[-1:] != b"/"
top_path = path
top_path_prefix_size = len(top_path) + 1
dirs: Dict[bytes, Directory] = {}
dirs[top_path] = cls({"name": os.path.basename(top_path), "path": top_path})
......@@ -507,7 +515,7 @@ class Directory(MerkleNode):
top_dir = dirs[top_path]
for path in reversed(filtered):
path = path[len(top_path) + 1 :]
path = path[top_path_prefix_size:]
del top_dir[path]
# a bit sad but now we have to traverse the gathered tree structure to
# filter it again (e.g. for the ignore_empty_directory filter to work
......
# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -950,6 +949,10 @@ class DirectoryToObjects(DataMixin, unittest.TestCase):
expected_content_count=len(self.contents) + 1,
)
def test_directory_to_objects_ignore_name_with_slash(self):
self.tmpdir_name = self.tmpdir_name + b"/"
self.test_directory_to_objects_ignore_name()
def test_directory_to_objects_ignore_name_case(self):
directory = Directory.from_disk(
path=self.tmpdir_name,
......@@ -1042,7 +1045,10 @@ class DirectoryToObjects(DataMixin, unittest.TestCase):
# Corresponds to the deeper files and directories plus the four top level ones
assert total == [4, 1, 1, 1, 1]
def test_exclude(self):
def test_exclude_trailing(self):
self.test_exclude(trailing_slash=True)
def test_exclude(self, trailing_slash=False):
"""exclude patterns"""
with tempfile.TemporaryDirectory() as dirname:
dirname = os.fsencode(dirname)
......@@ -1063,7 +1069,10 @@ class DirectoryToObjects(DataMixin, unittest.TestCase):
)
# no filter
directory = Directory.from_disk(path=dirname)
dir_path = dirname
if trailing_slash:
dir_path += b"/"
directory = Directory.from_disk(path=dir_path)
assert set(directory.keys()) == {
b"baz",
b"foo",
......@@ -1088,7 +1097,7 @@ class DirectoryToObjects(DataMixin, unittest.TestCase):
exclude_patterns = [b"excluded_*"]
path_filter = ignore_directories_patterns(dirname, exclude_patterns)
directory_f = Directory.from_disk(path=dirname, path_filter=path_filter)
directory_f = Directory.from_disk(path=dir_path, path_filter=path_filter)
assert set(directory_f.keys()) == {b"baz", b"foo", b"foofile", b"file"}
# XXX should foo/excluded_dir and foo/excluded_dir2 be excluded as
# well? Currently they are not
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment