Skip to content
Snippets Groups Projects
Commit b7c163b8 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

Update upstream source from tag 'debian/upstream/0.0.11'

Update to upstream version '0.0.11'
with Debian dir a108f7c4dbea9105ec578d819bf6d8c357e6501c
parents 291b3459 b682f27f
No related branches found
No related tags found
No related merge requests found
Metadata-Version: 1.0 Metadata-Version: 1.0
Name: swh.model Name: swh.model
Version: 0.0.10 Version: 0.0.11
Summary: Software Heritage data model Summary: Software Heritage data model
Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
Author: Software Heritage developers Author: Software Heritage developers
......
Metadata-Version: 1.0 Metadata-Version: 1.0
Name: swh.model Name: swh.model
Version: 0.0.10 Version: 0.0.11
Summary: Software Heritage data model Summary: Software Heritage data model
Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
Author: Software Heritage developers Author: Software Heritage developers
......
...@@ -339,7 +339,7 @@ def walk_and_compute_sha1_from_directory(rootdir, ...@@ -339,7 +339,7 @@ def walk_and_compute_sha1_from_directory(rootdir,
with_root_tree=True, with_root_tree=True,
remove_empty_folder=False): remove_empty_folder=False):
"""(Deprecated) TODO migrate the code to """(Deprecated) TODO migrate the code to
walk_and_compute_sha1_from_directory_2. compute_hashes_from_directory.
Compute git sha1 from directory rootdir. Compute git sha1 from directory rootdir.
...@@ -420,9 +420,9 @@ def walk_and_compute_sha1_from_directory(rootdir, ...@@ -420,9 +420,9 @@ def walk_and_compute_sha1_from_directory(rootdir,
return ls_hashes return ls_hashes
def walk_and_compute_sha1_from_directory_2(rootdir, def compute_hashes_from_directory(rootdir,
dir_ok_fn=default_validation_dir, dir_ok_fn=default_validation_dir,
remove_empty_folder=False): remove_empty_folder=False):
"""Compute git sha1 from directory rootdir. """Compute git sha1 from directory rootdir.
Args: Args:
...@@ -530,240 +530,29 @@ def walk_and_compute_sha1_from_directory_2(rootdir, ...@@ -530,240 +530,29 @@ def walk_and_compute_sha1_from_directory_2(rootdir,
return ls_hashes return ls_hashes
def recompute_sha1_in_memory(root, deeper_rootdir, objects): def children_hashes(children, objects):
"""TODO: Use git.walk_and_compute_sha1_from_directory_2 """Given a collection of children path, yield the corresponding
hashes.
Recompute git sha1 from directory deeper_rootdir to root.
This function relies exclusively on `objects` for hashes. It
expects the deeper_rootdir and every key below that path to be
already updated.
Args:
- root: Upper root directory (so same as
objects[ROOT_TREE_KEY][0]['path'])
- deeper_rootdir: Upper root directory from which the git hash
computation has alredy been updated.
- objects: objects dictionary as per returned by
`walk_and_compute_sha1_from_directory`
Returns:
Dictionary of entries with keys <path-name> and as values a list of
directory entries.
Those are list of dictionary with keys:
- 'perms'
- 'type'
- 'name'
- 'sha1_git'
- and specifically content: 'sha1', 'sha256', ...
Note:
One special key is ROOT_TREE_KEY to indicate the upper root of the
directory (this is the revision's target directory).
Raises:
Nothing
If something is raised, this is a programmatic error.
"""
# list of paths to update from bottom to top
upper_root = os.path.dirname(root)
rootdir = os.path.dirname(deeper_rootdir)
while rootdir != upper_root:
files = objects[rootdir]
ls_hashes = []
for hashfile in files:
fulldirname = hashfile['path']
if hashfile['type'] == GitType.TREE:
tree_hash = compute_tree_metadata(fulldirname, objects)
ls_hashes.append(tree_hash)
else:
ls_hashes.append(hashfile)
objects[rootdir] = ls_hashes
parent = os.path.dirname(rootdir)
rootdir = parent
# update root
root_tree_hash = compute_directory_git_sha1(root, objects)
objects[ROOT_TREE_KEY][0]['sha1_git'] = root_tree_hash
return objects
def commonpath(paths):
"""Given a sequence of path names, returns the longest common sub-path.
Copied from Python3.5
"""
if not paths:
raise ValueError('commonpath() arg is an empty sequence')
if isinstance(paths[0], bytes):
sep = b'/'
curdir = b'.'
else:
sep = '/'
curdir = '.'
try:
split_paths = [path.split(sep) for path in paths]
try:
isabs, = set(p[:1] == sep for p in paths)
except ValueError:
raise ValueError("Can't mix absolute and relative paths")
split_paths = [
[c for c in s if c and c != curdir] for s in split_paths]
s1 = min(split_paths)
s2 = max(split_paths)
common = s1
for i, c in enumerate(s1):
if c != s2[i]:
common = s1[:i]
break
prefix = sep if isabs else sep[:0]
return prefix + sep.join(common)
except (TypeError, AttributeError):
raise
def __remove_paths_from_objects(objects, rootpaths,
dir_ok_fn=default_validation_dir):
"""Given top paths to remove, remove all paths and descendants from
objects.
Args: Args:
objects: The dictionary of paths to clean up. objects: objects hash as returned by git.compute_hashes_from_directory.
rootpaths: The rootpaths to remove from objects. children: collection of bytes path
- dir_ok_fn: Validation function on folder/file names.
Default to accept all.
Returns:
Objects dictionary without the rootpaths and their descendants.
"""
dirpaths_to_clean = set()
for path in rootpaths:
path_list = objects.pop(path, None)
if path_list: # need to remove the children directories too
for child in path_list:
if child['type'] == GitType.TREE:
dirpaths_to_clean.add(child['path'])
parent = os.path.dirname(path)
# Is the parent still ok? (e.g. not an empty dir for example)
parent_check = dir_ok_fn(parent)
if not parent_check and parent not in dirpaths_to_clean:
dirpaths_to_clean.add(parent)
else:
# we need to pop the reference to path in the parent list
if objects.get(parent):
objects[parent] = filter(
lambda p: p != path,
objects.get(parent, []))
if dirpaths_to_clean:
objects = __remove_paths_from_objects(objects,
dirpaths_to_clean,
dir_ok_fn)
return objects
def update_checksums_from(changed_paths, objects,
dir_ok_fn=default_validation_dir,
remove_empty_folder=False):
"""Given a list of changed paths, recompute the checksums only where
needed.
Args: Yields:
changed_paths: Dictionary list representing path changes. Dictionary hashes
A dictionary has the form:
- path: the full path to the file Added, Modified or Deleted
- action: A, M or D
objects: dictionary returned by `walk_and_compute_sha1_from_directory`.
- dir_ok_fn: Validation function on folder/file names.
Default to accept all.
Returns:
Dictionary returned by `walk_and_compute_sha1_from_directory`
updated (mutated) according to latest filesystem modifications.
""" """
root = objects[ROOT_TREE_KEY][0]['path'] for p in children:
if root.endswith(b'/'): c = objects.get(p)
root = root.rstrip(b'/') if c:
h = c.get('checksums')
paths = set() # contain the list of impacted paths (A, D, M) if h:
paths_to_remove = set() # will contain the list of deletion paths (only D) yield h
# a first round-trip to ensure we don't need to...
for changed_path in changed_paths:
path = changed_path['path']
parent = os.path.dirname(path)
if parent == root: # ... recompute everything anyway
return walk_and_compute_sha1_from_directory(
root,
dir_ok_fn=dir_ok_fn,
remove_empty_folder=remove_empty_folder)
if changed_path['action'] == 'D': # (D)elete
paths_to_remove.add(path)
paths.add(parent)
# no modification on paths (paths also contain deletion paths if any)
if not paths:
return objects
rootdir = commonpath(list(paths))
if paths_to_remove:
# Now we can remove the deleted directories from objects dictionary
objects = __remove_paths_from_objects(objects,
paths_to_remove,
dir_ok_fn)
# Recompute from disk the checksums from impacted common ancestor
# rootdir changes.
while not objects.get(rootdir, None):
# it could happened that the path is not found.
# In the case of an ignored folder for example.
# So we'll find the next existing parent
rootdir = os.path.dirname(rootdir)
if rootdir == root: # fallback, if we hit root, walk
# everything anyway
return walk_and_compute_sha1_from_directory(
root,
dir_ok_fn=dir_ok_fn,
remove_empty_folder=remove_empty_folder)
hashes = walk_and_compute_sha1_from_directory(
rootdir,
dir_ok_fn=dir_ok_fn,
with_root_tree=False,
remove_empty_folder=remove_empty_folder)
# Then update the original objects with new
# checksums for the arborescence tree below rootdir
objects.update(hashes)
# Recompute hashes in memory from rootdir to root
return recompute_sha1_in_memory(root, rootdir, objects)
def objects_per_type(filter_type, objects_per_path): def objects_per_type(filter_type, objects_per_path):
"""Given an object dictionary returned by """Given an object dictionary returned by
`swh.model.git.walk_and_compute_sha1_from_directory_2`, yields `swh.model.git.compute_hashes_from_directory`, yields
corresponding element type's hashes corresponding element type's hashes
Args: Args:
...@@ -774,21 +563,13 @@ def objects_per_type(filter_type, objects_per_path): ...@@ -774,21 +563,13 @@ def objects_per_type(filter_type, objects_per_path):
Elements of type filter_type's hashes Elements of type filter_type's hashes
""" """
def __children_hash(objects, children):
for p in children:
c = objects.get(p, None)
if c:
h = c.get('checksums', None)
if h:
yield h
for path, obj in objects_per_path.items(): for path, obj in objects_per_path.items():
o = obj['checksums'] o = obj['checksums']
if o['type'] == filter_type: if o['type'] == filter_type:
if 'children' in obj: # for trees if 'children' in obj: # for trees
if obj['children']: if obj['children']:
o['children'] = __children_hash(objects_per_path, o['children'] = children_hashes(obj['children'],
obj['children']) objects_per_path)
else: else:
o['children'] = [] o['children'] = []
yield o yield o
This diff is collapsed.
v0.0.10-0-g87fcced v0.0.11-0-g1a2b969
\ No newline at end of file \ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment