Skip to content
Snippets Groups Projects
Commit 5ca1bda3 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

Allow filtering unwanted directory when computing git hash

parent 2a0107ee
No related branches found
No related tags found
No related merge requests found
......@@ -179,9 +179,17 @@ def compute_tree_metadata(dirname, ls_hashes):
}
def walk_and_compute_sha1_from_directory(rootdir):
def walk_and_compute_sha1_from_directory(rootdir,
dir_ok_fn=lambda dirpath: True):
"""Compute git sha1 from directory rootdir.
Args:
- rootdir: Root directory from which beginning the git hash computation
- dir_ok_fn: Filter function to filter directory according to rules
defined in the function. By default, all folders are ok.
Example override: dir_ok_fn = lambda dirpath: b'svn' not in dirpath
Returns:
Dictionary of entries with keys <path-name> and as values a list of
directory entries.
......@@ -204,21 +212,30 @@ def walk_and_compute_sha1_from_directory(rootdir):
ls_hashes = {}
all_links = set()
for dirpath, dirnames, filenames in os.walk(rootdir, topdown=False):
def filtfn(dirpath, dirnames):
return list(filter(lambda dirname: dir_ok_fn(os.path.join(dirpath,
dirname)),
dirnames))
gen_dir = ((dp, filtfn(dp, dns), fns) for (dp, dns, fns)
in os.walk(rootdir, topdown=False)
if dir_ok_fn(dp))
for dirpath, dirnames, filenames in gen_dir:
hashes = []
links = [os.path.join(dirpath, file)
links = (os.path.join(dirpath, file)
for file in (filenames+dirnames)
if os.path.islink(os.path.join(dirpath, file))]
if os.path.islink(os.path.join(dirpath, file)))
for linkpath in links:
all_links.add(linkpath)
m_hashes = compute_link_metadata(linkpath)
hashes.append(m_hashes)
only_files = [os.path.join(dirpath, file)
only_files = (os.path.join(dirpath, file)
for file in filenames
if os.path.join(dirpath, file) not in all_links]
if os.path.join(dirpath, file) not in all_links)
for filepath in only_files:
m_hashes = compute_blob_metadata(filepath)
hashes.append(m_hashes)
......@@ -226,10 +243,10 @@ def walk_and_compute_sha1_from_directory(rootdir):
ls_hashes[dirpath] = hashes
dir_hashes = []
subdirs = [os.path.join(dirpath, dir)
subdirs = (os.path.join(dirpath, dir)
for dir in dirnames
if os.path.join(dirpath, dir)
not in all_links]
not in all_links)
for fulldirname in subdirs:
tree_hash = compute_tree_metadata(fulldirname, ls_hashes)
dir_hashes.append(tree_hash)
......
......@@ -3,7 +3,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import tempfile
import unittest
import subprocess
from nose.tools import istest
......@@ -131,3 +134,79 @@ blah
# then
self.assertEqual(checksum, self.checksums['tag_sha1_git'])
class GitHashArborescenceTree(unittest.TestCase):
@classmethod
def setUpClass(cls):
super().setUpClass()
cls.tmp_root_path = tempfile.mkdtemp().encode('utf-8')
start_path = os.path.dirname(__file__).encode('utf-8')
sample_folder_archive = os.path.join(start_path,
b'../../../..',
b'swh-storage-testdata',
b'dir-folders',
b'sample-folder.tgz')
cls.root_path = os.path.join(cls.tmp_root_path, b'sample-folder')
# uncompress the sample folder
subprocess.check_output(
['tar', 'xvf', sample_folder_archive, '-C', cls.tmp_root_path])
@istest
def walk_and_compute_sha1_from_directory(self):
# make a temporary arborescence tree to hash without ignoring anything
# same as previous behavior
walk0 = git.walk_and_compute_sha1_from_directory(self.tmp_root_path)
keys0 = list(walk0.keys())
path_excluded = os.path.join(self.tmp_root_path,
b'sample-folder',
b'foo')
self.assertTrue(path_excluded in keys0) # it is not excluded here
# make the same temporary arborescence tree to hash with ignoring one
# folder foo
walk1 = git.walk_and_compute_sha1_from_directory(
self.tmp_root_path,
dir_ok_fn=lambda dirpath: b'sample-folder/foo' not in dirpath)
keys1 = list(walk1.keys())
self.assertTrue(path_excluded not in keys1)
# remove the keys that can't be the same (due to hash definition)
# Those are the top level folders
keys_diff = [self.tmp_root_path,
os.path.join(self.tmp_root_path, b'sample-folder'),
git.ROOT_TREE_KEY]
for k in keys_diff:
self.assertNotEquals(walk0[k], walk1[k])
# The remaining keys (bottom path) should have exactly the same hashes
# as before
keys = set(keys1) - set(keys_diff)
actual_walk1 = {}
for k in keys:
self.assertEquals(walk0[k], walk1[k])
actual_walk1[k] = walk1[k]
expected_checksums = {
os.path.join(self.tmp_root_path, b'sample-folder/empty-folder'): [], # noqa
os.path.join(self.tmp_root_path, b'sample-folder/bar/barfoo'): [{ # noqa
'type': git.GitType.BLOB, # noqa
'sha256': b'=\xb5\xae\x16\x80U\xbc\xd9:M\x08(]\xc9\x9f\xfe\xe2\x883\x03\xb2?\xac^\xab\x85\x02s\xa8\xeaUF', # noqa
'name': b'another-quote.org', # noqa
'path': os.path.join(self.tmp_root_path, b'sample-folder/bar/barfoo/another-quote.org'), # noqa
'perms': git.GitPerm.BLOB, # noqa
'sha1': b'\x90\xa6\x13\x8b\xa5\x99\x15&\x1e\x17\x99H8j\xa1\xcc*\xa9"\n', # noqa
'sha1_git': b'\x136\x93\xb1%\xba\xd2\xb4\xac1\x855\xb8I\x01\xeb\xb1\xf6\xb68'}], # noqa
os.path.join(self.tmp_root_path, b'sample-folder/bar'): [{ # noqa
'type': git.GitType.TREE, # noqa
'perms': git.GitPerm.TREE, # noqa
'name': b'barfoo', # noqa
'path': os.path.join(self.tmp_root_path, b'sample-folder/bar/barfoo'), # noqa
'sha1_git': b'\xc3\x02\x0fk\xf15\xa3\x8cm\xf3\xaf\xeb_\xb3\x822\xc5\xe0p\x87'}]} # noqa
self.assertEquals(actual_walk1, expected_checksums)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment