Skip to content
Snippets Groups Projects
Commit cec378f0 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

Update upstream source from tag 'debian/upstream/0.0.10'

Update to upstream version '0.0.10'
with Debian dir f15cce5a06b2f6487de49dd32123ca52507d8d3d
parents 93bca44b 70b68eff
No related branches found
No related tags found
No related merge requests found
Metadata-Version: 1.0
Name: swh.model
Version: 0.0.9
Version: 0.0.10
Summary: Software Heritage data model
Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
Author: Software Heritage developers
......
Metadata-Version: 1.0
Name: swh.model
Version: 0.0.9
Version: 0.0.10
Summary: Software Heritage data model
Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
Author: Software Heritage developers
......
......@@ -31,11 +31,10 @@ class GitPerm(Enum):
LINK = b'120000'
def compute_directory_git_sha1(dirpath, hashes):
"""Compute a directory git sha1 for a dirpath.
def _compute_directory_git_sha1(hashes):
"""Compute a directory git sha1 from hashes.
Args:
dirpath: the directory's absolute path
hashes: list of tree entries with keys:
- sha1_git: the tree entry's sha1
- name: file or subdir's name
......@@ -57,12 +56,32 @@ def compute_directory_git_sha1(dirpath, hashes):
'target': entry['sha1_git'],
'type': 'dir' if entry['perms'] == GitPerm.TREE else 'file',
}
for entry in hashes[dirpath]
for entry in hashes
]
}
return hashutil.hash_to_bytes(identifiers.directory_identifier(directory))
def compute_directory_git_sha1(dirpath, hashes):
"""Compute a directory git sha1 for a dirpath.
Args:
dirpath: the directory's absolute path
hashes: list of tree entries with keys:
- sha1_git: the tree entry's sha1
- name: file or subdir's name
- perms: the tree entry's sha1 permissions
Returns:
the binary sha1 of the dictionary's identifier
Assumes:
Every path exists in hashes.
"""
return _compute_directory_git_sha1(hashes[dirpath])
def compute_revision_sha1_git(revision):
"""Compute a revision sha1 git from its dict representation.
......@@ -162,11 +181,15 @@ def compute_blob_metadata(filepath):
return blob_metadata
def compute_tree_metadata(dirname, ls_hashes):
def _compute_tree_metadata(dirname, hashes):
"""Given a dirname, compute the git metadata.
Args:
dirname: absolute pathname of the directory.
hashes: list of tree dirname's entries with keys:
- sha1_git: the tree entry's sha1
- name: file or subdir's name
- perms: the tree entry's sha1 permissions
Returns:
Dictionary of values:
......@@ -178,7 +201,7 @@ def compute_tree_metadata(dirname, ls_hashes):
"""
return {
'sha1_git': compute_directory_git_sha1(dirname, ls_hashes),
'sha1_git': _compute_directory_git_sha1(hashes),
'name': os.path.basename(dirname),
'perms': GitPerm.TREE,
'type': GitType.TREE,
......@@ -186,6 +209,25 @@ def compute_tree_metadata(dirname, ls_hashes):
}
def compute_tree_metadata(dirname, ls_hashes):
"""Given a dirname, compute the git metadata.
Args:
dirname: absolute pathname of the directory.
ls_hashes: dictionary of path, hashes
Returns:
Dictionary of values:
- sha1_git: tree's sha1 git
- name: basename of the directory
- perms: git permission for directory
- type: git type for directory
- path: absolute path to directory on filesystem
"""
return _compute_tree_metadata(dirname, ls_hashes[dirname])
def default_validation_dir(dirpath):
"""Default validation function.
This is the equivalent of the identity function.
......@@ -296,7 +338,10 @@ def walk_and_compute_sha1_from_directory(rootdir,
dir_ok_fn=default_validation_dir,
with_root_tree=True,
remove_empty_folder=False):
"""Compute git sha1 from directory rootdir.
"""(Deprecated) TODO migrate the code to
walk_and_compute_sha1_from_directory_2.
Compute git sha1 from directory rootdir.
Args:
- rootdir: Root directory from which beginning the git hash computation
......@@ -355,7 +400,8 @@ def walk_and_compute_sha1_from_directory(rootdir,
dir_hashes = []
for fulldirname in (dir for dir in dirnames if dir not in all_links):
tree_hash = compute_tree_metadata(fulldirname, ls_hashes)
tree_hash = _compute_tree_metadata(fulldirname,
ls_hashes[fulldirname])
dir_hashes.append(tree_hash)
ls_hashes[dirpath].extend(dir_hashes)
......@@ -363,7 +409,7 @@ def walk_and_compute_sha1_from_directory(rootdir,
if with_root_tree:
# compute the current directory hashes
root_hash = {
'sha1_git': compute_directory_git_sha1(rootdir, ls_hashes),
'sha1_git': _compute_directory_git_sha1(ls_hashes[rootdir]),
'path': rootdir,
'name': os.path.basename(rootdir),
'perms': GitPerm.TREE,
......@@ -374,8 +420,120 @@ def walk_and_compute_sha1_from_directory(rootdir,
return ls_hashes
def walk_and_compute_sha1_from_directory_2(rootdir,
dir_ok_fn=default_validation_dir,
remove_empty_folder=False):
"""Compute git sha1 from directory rootdir.
Args:
- rootdir: Root directory from which beginning the git hash
computation
- dir_ok_fn: Filter function to filter directory according to rules
defined in the function. By default, all folders are ok.
Example override: dir_ok_fn = lambda dirpath: b'svn' not in dirpath
Returns:
Dictionary of entries with keys absolute path name.
Path-name can be a file/link or directory.
The associated value is a dictionary with:
- checksums: the dictionary with the hashes for the link/file/dir
Those are list of dictionary with keys:
- 'perms'
- 'type'
- 'name'
- 'sha1_git'
- and specifically content: 'sha1', 'sha256', ...
- children: Only for a directory, the set of children paths
Note:
One special key is the / which indicates the upper root of
the directory (this is the revision's directory).
Raises:
Nothing
If something is raised, this is a programmatic error.
"""
def __get_dict_from_dirpath(_dict, path):
"""Retrieve the default associated value for key path.
"""
return _dict.get(path, dict(children=set(), checksums=None))
def __get_dict_from_filepath(_dict, path):
"""Retrieve the default associated value for key path.
"""
return _dict.get(path, dict(checksums=None))
ls_hashes = {}
all_links = set()
if rootdir.endswith(b'/'):
rootdir = rootdir.rstrip(b'/')
for dirpath, dirnames, filenames in __walk(
rootdir, dir_ok_fn, remove_empty_folder):
dir_entry = __get_dict_from_dirpath(ls_hashes, dirpath)
children = dir_entry['children']
links = (file
for file in filenames.union(dirnames)
if os.path.islink(file))
for linkpath in links:
all_links.add(linkpath)
m_hashes = compute_link_metadata(linkpath)
d = __get_dict_from_filepath(ls_hashes, linkpath)
d['checksums'] = m_hashes
ls_hashes[linkpath] = d
children.add(linkpath)
for filepath in (file for file in filenames if file not in all_links):
m_hashes = compute_blob_metadata(filepath)
d = __get_dict_from_filepath(ls_hashes, filepath)
d['checksums'] = m_hashes
ls_hashes[filepath] = d
children.add(filepath)
for fulldirname in (dir for dir in dirnames if dir not in all_links):
d_hashes = __get_dict_from_dirpath(ls_hashes, fulldirname)
tree_hash = _compute_tree_metadata(
fulldirname,
(ls_hashes[p]['checksums'] for p in d_hashes['children'])
)
d = __get_dict_from_dirpath(ls_hashes, fulldirname)
d['checksums'] = tree_hash
ls_hashes[fulldirname] = d
children.add(fulldirname)
dir_entry['children'] = children
ls_hashes[dirpath] = dir_entry
# compute the current directory hashes
d_hashes = __get_dict_from_dirpath(ls_hashes, rootdir)
root_hash = {
'sha1_git': _compute_directory_git_sha1(
(ls_hashes[p]['checksums'] for p in d_hashes['children'])
),
'path': rootdir,
'name': os.path.basename(rootdir),
'perms': GitPerm.TREE,
'type': GitType.TREE
}
d_hashes['checksums'] = root_hash
ls_hashes[rootdir] = d_hashes
return ls_hashes
def recompute_sha1_in_memory(root, deeper_rootdir, objects):
"""Recompute git sha1 from directory deeper_rootdir to root.
"""TODO: Use git.walk_and_compute_sha1_from_directory_2
Recompute git sha1 from directory deeper_rootdir to root.
This function relies exclusively on `objects` for hashes. It
expects the deeper_rootdir and every key below that path to be
......@@ -601,3 +759,36 @@ def update_checksums_from(changed_paths, objects,
# Recompute hashes in memory from rootdir to root
return recompute_sha1_in_memory(root, rootdir, objects)
def objects_per_type(filter_type, objects_per_path):
"""Given an object dictionary returned by
`swh.model.git.walk_and_compute_sha1_from_directory_2`, yields
corresponding element type's hashes
Args:
filter_type: one of GitType enum
objects_per_path:
Yields:
Elements of type filter_type's hashes
"""
def __children_hash(objects, children):
for p in children:
c = objects.get(p, None)
if c:
h = c.get('checksums', None)
if h:
yield h
for path, obj in objects_per_path.items():
o = obj['checksums']
if o['type'] == filter_type:
if 'children' in obj: # for trees
if obj['children']:
o['children'] = __children_hash(objects_per_path,
obj['children'])
else:
o['children'] = []
yield o
v0.0.9-0-g9b9ec94
\ No newline at end of file
v0.0.10-0-g87fcced
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment