Skip to content
Snippets Groups Projects
Verified Commit 17f04939 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

Open a new walk_and_compute_sha1_from_directory_2 api

This actually is supposed to replace
walk_and_compute_sha1_from_directory.
The data structure used here is better at handling updates.

(Code that actually got lost and rewritten - Tests are definitely lost though)
parent 1af7aedb
No related branches found
No related tags found
No related merge requests found
...@@ -338,7 +338,10 @@ def walk_and_compute_sha1_from_directory(rootdir, ...@@ -338,7 +338,10 @@ def walk_and_compute_sha1_from_directory(rootdir,
dir_ok_fn=default_validation_dir, dir_ok_fn=default_validation_dir,
with_root_tree=True, with_root_tree=True,
remove_empty_folder=False): remove_empty_folder=False):
"""Compute git sha1 from directory rootdir. """(Deprecated) TODO migrate the code to
walk_and_compute_sha1_from_directory_2.
Compute git sha1 from directory rootdir.
Args: Args:
- rootdir: Root directory from which beginning the git hash computation - rootdir: Root directory from which beginning the git hash computation
...@@ -417,8 +420,120 @@ def walk_and_compute_sha1_from_directory(rootdir, ...@@ -417,8 +420,120 @@ def walk_and_compute_sha1_from_directory(rootdir,
return ls_hashes return ls_hashes
def walk_and_compute_sha1_from_directory_2(rootdir,
dir_ok_fn=default_validation_dir,
remove_empty_folder=False):
"""Compute git sha1 from directory rootdir.
Args:
- rootdir: Root directory from which beginning the git hash
computation
- dir_ok_fn: Filter function to filter directory according to rules
defined in the function. By default, all folders are ok.
Example override: dir_ok_fn = lambda dirpath: b'svn' not in dirpath
Returns:
Dictionary of entries with keys absolute path name.
Path-name can be a file/link or directory.
The associated value is a dictionary with:
- checksums: the dictionary with the hashes for the link/file/dir
Those are list of dictionary with keys:
- 'perms'
- 'type'
- 'name'
- 'sha1_git'
- and specifically content: 'sha1', 'sha256', ...
- children: Only for a directory, the set of children paths
Note:
One special key is the / which indicates the upper root of
the directory (this is the revision's directory).
Raises:
Nothing
If something is raised, this is a programmatic error.
"""
def __get_dict_from_dirpath(_dict, path):
"""Retrieve the default associated value for key path.
"""
return _dict.get(path, dict(children=set(), checksums=None))
def __get_dict_from_filepath(_dict, path):
"""Retrieve the default associated value for key path.
"""
return _dict.get(path, dict(checksums=None))
ls_hashes = {}
all_links = set()
if rootdir.endswith(b'/'):
rootdir = rootdir.rstrip(b'/')
for dirpath, dirnames, filenames in __walk(
rootdir, dir_ok_fn, remove_empty_folder):
dir_entry = __get_dict_from_dirpath(ls_hashes, dirpath)
children = dir_entry['children']
links = (file
for file in filenames.union(dirnames)
if os.path.islink(file))
for linkpath in links:
all_links.add(linkpath)
m_hashes = compute_link_metadata(linkpath)
d = __get_dict_from_filepath(ls_hashes, linkpath)
d['checksums'] = m_hashes
ls_hashes[linkpath] = d
children.add(linkpath)
for filepath in (file for file in filenames if file not in all_links):
m_hashes = compute_blob_metadata(filepath)
d = __get_dict_from_filepath(ls_hashes, filepath)
d['checksums'] = m_hashes
ls_hashes[filepath] = d
children.add(filepath)
for fulldirname in (dir for dir in dirnames if dir not in all_links):
d_hashes = __get_dict_from_dirpath(ls_hashes, fulldirname)
tree_hash = _compute_tree_metadata(
fulldirname,
(ls_hashes[p]['checksums'] for p in d_hashes['children'])
)
d = __get_dict_from_dirpath(ls_hashes, fulldirname)
d['checksums'] = tree_hash
ls_hashes[fulldirname] = d
children.add(fulldirname)
dir_entry['children'] = children
ls_hashes[dirpath] = dir_entry
# compute the current directory hashes
d_hashes = __get_dict_from_dirpath(ls_hashes, rootdir)
root_hash = {
'sha1_git': _compute_directory_git_sha1(
(ls_hashes[p]['checksums'] for p in d_hashes['children'])
),
'path': rootdir,
'name': os.path.basename(rootdir),
'perms': GitPerm.TREE,
'type': GitType.TREE
}
d_hashes['checksums'] = root_hash
ls_hashes[rootdir] = d_hashes
return ls_hashes
def recompute_sha1_in_memory(root, deeper_rootdir, objects): def recompute_sha1_in_memory(root, deeper_rootdir, objects):
"""Recompute git sha1 from directory deeper_rootdir to root. """TODO: Use git.walk_and_compute_sha1_from_directory_2
Recompute git sha1 from directory deeper_rootdir to root.
This function relies exclusively on `objects` for hashes. It This function relies exclusively on `objects` for hashes. It
expects the deeper_rootdir and every key below that path to be expects the deeper_rootdir and every key below that path to be
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment