Skip to content
Snippets Groups Projects
Commit 007a710a authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

Update upstream source from tag 'debian/upstream/0.0.4'

Update to upstream version '0.0.4'
with Debian dir d3073bbc1ce69a465ec68ede55fbbcca67553d2e
parents 66de1f8e 5a7a7d45
No related branches found
No related tags found
No related merge requests found
FLAG=-v
NOSEFLAGS=-v -s
Metadata-Version: 1.0
Name: swh.model
Version: 0.0.3
Version: 0.0.4
Summary: Software Heritage data model
Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
Author: Software Heritage developers
......
Metadata-Version: 1.0
Name: swh.model
Version: 0.0.3
Version: 0.0.4
Summary: Software Heritage data model
Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
Author: Software Heritage developers
......
......@@ -3,6 +3,7 @@ AUTHORS
LICENSE
MANIFEST.in
Makefile
Makefile.local
requirements.txt
setup.py
version.txt
......@@ -19,6 +20,7 @@ swh.model.egg-info/requires.txt
swh.model.egg-info/top_level.txt
swh/model/__init__.py
swh/model/exceptions.py
swh/model/git.py
swh/model/hashutil.py
swh/model/identifiers.py
swh/model/validators.py
......@@ -27,6 +29,8 @@ swh/model/fields/compound.py
swh/model/fields/hashes.py
swh/model/fields/simple.py
swh/model/tests/__init__.py
swh/model/tests/test_git.py
swh/model/tests/test_git_slow.py
swh/model/tests/test_hashutil.py
swh/model/tests/test_identifiers.py
swh/model/tests/test_validators.py
......
# Copyright (C) 2015 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
from enum import Enum
from swh.model import hashutil, identifiers
ROOT_TREE_KEY = b''
class GitType(Enum):
BLOB = b'blob'
TREE = b'tree'
EXEC = b'exec'
LINK = b'link'
COMM = b'commit'
RELE = b'release'
REFS = b'ref'
class GitPerm(Enum):
BLOB = b'100644'
TREE = b'40000'
EXEC = b'100755'
LINK = b'120000'
def compute_directory_git_sha1(dirpath, hashes):
"""Compute a directory git sha1 for a dirpath.
Args:
dirpath: the directory's absolute path
hashes: list of tree entries with keys:
- sha1_git: the tree entry's sha1
- name: file or subdir's name
- perms: the tree entry's sha1 permissions
Returns:
the binary sha1 of the dictionary's identifier
Assumes:
Every path exists in hashes.
"""
directory = {
'entries':
[
{
'name': entry['name'],
'perms': int(entry['perms'].value, 8),
'target': entry['sha1_git'],
'type': 'dir' if entry['perms'] == GitPerm.TREE else 'file',
}
for entry in hashes[dirpath]
]
}
return hashutil.hash_to_bytes(identifiers.directory_identifier(directory))
def compute_revision_sha1_git(revision):
"""Compute a revision sha1 git from its dict representation.
Args:
revision: Additional dictionary information needed to compute a
synthetic
revision. Following keys are expected:
- author
- date
- committer
- committer_date
- message
- type
- directory: binary form of the tree hash
Returns:
revision sha1 in bytes
# FIXME: beware, bytes output from storage api
"""
return hashutil.hash_to_bytes(identifiers.revision_identifier(revision))
def compute_release_sha1_git(release):
"""Compute a release sha1 git from its dict representation.
Args:
release: Additional dictionary information needed to compute a
synthetic release. Following keys are expected:
- name
- message
- date
- author
- revision: binary form of the sha1_git revision targeted by this
Returns:
release sha1 in bytes
"""
return hashutil.hash_to_bytes(identifiers.release_identifier(release))
def compute_link_metadata(linkpath):
"""Given a linkpath, compute the git metadata.
Args:
linkpath: absolute pathname of the link
Returns:
Dictionary of values:
- name: basename of the link
- perms: git permission for link
- type: git type for link
"""
data = os.readlink(linkpath)
link_metadata = hashutil.hash_data(data)
link_metadata.update({
'data': data,
'length': len(data),
'name': os.path.basename(linkpath),
'perms': GitPerm.LINK,
'type': GitType.BLOB,
'path': linkpath
})
return link_metadata
def compute_blob_metadata(filepath):
"""Given a filepath, compute the git metadata.
Args:
filepath: absolute pathname of the file.
Returns:
Dictionary of values:
- name: basename of the file
- perms: git permission for file
- type: git type for file
"""
blob_metadata = hashutil.hash_path(filepath)
perms = GitPerm.EXEC if os.access(filepath, os.X_OK) else GitPerm.BLOB
blob_metadata.update({
'name': os.path.basename(filepath),
'perms': perms,
'type': GitType.BLOB,
'path': filepath
})
return blob_metadata
def compute_tree_metadata(dirname, ls_hashes):
"""Given a dirname, compute the git metadata.
Args:
dirname: absolute pathname of the directory.
Returns:
Dictionary of values:
- name: basename of the directory
- perms: git permission for directory
- type: git type for directory
"""
return {
'sha1_git': compute_directory_git_sha1(dirname, ls_hashes),
'name': os.path.basename(dirname),
'perms': GitPerm.TREE,
'type': GitType.TREE,
'path': dirname
}
def walk_and_compute_sha1_from_directory(rootdir,
dir_ok_fn=lambda dirpath: True):
"""Compute git sha1 from directory rootdir.
Args:
- rootdir: Root directory from which beginning the git hash computation
- dir_ok_fn: Filter function to filter directory according to rules
defined in the function. By default, all folders are ok.
Example override: dir_ok_fn = lambda dirpath: b'svn' not in dirpath
Returns:
Dictionary of entries with keys <path-name> and as values a list of
directory entries.
Those are list of dictionary with keys:
- 'perms'
- 'type'
- 'name'
- 'sha1_git'
- and specifically content: 'sha1', 'sha256', ...
Note:
One special key is ROOT_TREE_KEY to indicate the upper root of the
directory (this is the revision's directory).
Raises:
Nothing
If something is raised, this is a programmatic error.
"""
ls_hashes = {}
all_links = set()
def filtfn(dirpath, dirnames):
return list(filter(lambda dirname: dir_ok_fn(os.path.join(dirpath,
dirname)),
dirnames))
gen_dir = ((dp, filtfn(dp, dns), fns) for (dp, dns, fns)
in os.walk(rootdir, topdown=False)
if dir_ok_fn(dp))
for dirpath, dirnames, filenames in gen_dir:
hashes = []
links = (os.path.join(dirpath, file)
for file in (filenames+dirnames)
if os.path.islink(os.path.join(dirpath, file)))
for linkpath in links:
all_links.add(linkpath)
m_hashes = compute_link_metadata(linkpath)
hashes.append(m_hashes)
only_files = (os.path.join(dirpath, file)
for file in filenames
if os.path.join(dirpath, file) not in all_links)
for filepath in only_files:
m_hashes = compute_blob_metadata(filepath)
hashes.append(m_hashes)
ls_hashes[dirpath] = hashes
dir_hashes = []
subdirs = (os.path.join(dirpath, dir)
for dir in dirnames
if os.path.join(dirpath, dir)
not in all_links)
for fulldirname in subdirs:
tree_hash = compute_tree_metadata(fulldirname, ls_hashes)
dir_hashes.append(tree_hash)
ls_hashes[dirpath].extend(dir_hashes)
# compute the current directory hashes
root_hash = {
'sha1_git': compute_directory_git_sha1(rootdir, ls_hashes),
'path': rootdir,
'name': os.path.basename(rootdir),
'perms': GitPerm.TREE,
'type': GitType.TREE
}
ls_hashes[ROOT_TREE_KEY] = [root_hash]
return ls_hashes
# Copyright (C) 2015 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import tempfile
import unittest
import subprocess
from nose.tools import istest
from swh.model import git
class GitHashlib(unittest.TestCase):
def setUp(self):
self.tree_data = b''.join([b'40000 barfoo\0',
bytes.fromhex('c3020f6bf135a38c6df'
'3afeb5fb38232c5e07087'),
b'100644 blah\0',
bytes.fromhex('63756ef0df5e4f10b6efa'
'33cfe5c758749615f20'),
b'100644 hello\0',
bytes.fromhex('907b308167f0880fb2a'
'5c0e1614bb0c7620f9dc3')])
self.commit_data = """tree 1c61f7259dcb770f46b194d941df4f08ff0a3970
author Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com> 1444054085 +0200
committer Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com> 1444054085 +0200
initial
""".encode('utf-8') # NOQA
self.tag_data = """object 24d012aaec0bc5a4d2f62c56399053d6cc72a241
type commit
tag 0.0.1
tagger Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com> 1444225145 +0200
blah
""".encode('utf-8') # NOQA
self.checksums = {
'tree_sha1_git': bytes.fromhex('ac212302c45eada382b27bfda795db'
'121dacdb1c'),
'commit_sha1_git': bytes.fromhex('e960570b2e6e2798fa4cfb9af2c399'
'd629189653'),
'tag_sha1_git': bytes.fromhex('bc2b99ba469987bcf1272c189ed534'
'e9e959f120'),
}
@istest
def compute_directory_git_sha1(self):
# given
dirpath = 'some-dir-path'
hashes = {
dirpath: [{'perms': git.GitPerm.TREE,
'type': git.GitType.TREE,
'name': b'barfoo',
'sha1_git': bytes.fromhex('c3020f6bf135a38c6df'
'3afeb5fb38232c5e07087')},
{'perms': git.GitPerm.BLOB,
'type': git.GitType.BLOB,
'name': b'hello',
'sha1_git': bytes.fromhex('907b308167f0880fb2a'
'5c0e1614bb0c7620f9dc3')},
{'perms': git.GitPerm.BLOB,
'type': git.GitType.BLOB,
'name': b'blah',
'sha1_git': bytes.fromhex('63756ef0df5e4f10b6efa'
'33cfe5c758749615f20')}]
}
# when
checksum = git.compute_directory_git_sha1(dirpath, hashes)
# then
self.assertEqual(checksum, self.checksums['tree_sha1_git'])
@istest
def compute_revision_sha1_git(self):
# given
tree_hash = bytes.fromhex('1c61f7259dcb770f46b194d941df4f08ff0a3970')
revision = {
'author': {
'name': b'Antoine R. Dumont (@ardumont)',
'email': b'antoine.romain.dumont@gmail.com',
},
'date': {
'timestamp': 1444054085,
'offset': 120,
},
'committer': {
'name': b'Antoine R. Dumont (@ardumont)',
'email': b'antoine.romain.dumont@gmail.com',
},
'committer_date': {
'timestamp': 1444054085,
'offset': 120,
},
'message': b'initial\n',
'type': 'tar',
'directory': tree_hash,
'parents': [],
}
# when
checksum = git.compute_revision_sha1_git(revision)
# then
self.assertEqual(checksum, self.checksums['commit_sha1_git'])
@istest
def compute_release_sha1_git(self):
# given
revision_hash = bytes.fromhex('24d012aaec0bc5a4d2f62c56399053'
'd6cc72a241')
release = {
'name': b'0.0.1',
'author': {
'name': b'Antoine R. Dumont (@ardumont)',
'email': b'antoine.romain.dumont@gmail.com',
},
'date': {
'timestamp': 1444225145,
'offset': 120,
},
'message': b'blah\n',
'target_type': 'revision',
'target': revision_hash,
}
# when
checksum = git.compute_release_sha1_git(release)
# then
self.assertEqual(checksum, self.checksums['tag_sha1_git'])
class GitHashArborescenceTree(unittest.TestCase):
@classmethod
def setUpClass(cls):
super().setUpClass()
cls.tmp_root_path = tempfile.mkdtemp().encode('utf-8')
start_path = os.path.dirname(__file__).encode('utf-8')
sample_folder_archive = os.path.join(start_path,
b'../../../..',
b'swh-storage-testdata',
b'dir-folders',
b'sample-folder.tgz')
cls.root_path = os.path.join(cls.tmp_root_path, b'sample-folder')
# uncompress the sample folder
subprocess.check_output(
['tar', 'xvf', sample_folder_archive, '-C', cls.tmp_root_path])
@istest
def walk_and_compute_sha1_from_directory(self):
# make a temporary arborescence tree to hash without ignoring anything
# same as previous behavior
walk0 = git.walk_and_compute_sha1_from_directory(self.tmp_root_path)
keys0 = list(walk0.keys())
path_excluded = os.path.join(self.tmp_root_path,
b'sample-folder',
b'foo')
self.assertTrue(path_excluded in keys0) # it is not excluded here
# make the same temporary arborescence tree to hash with ignoring one
# folder foo
walk1 = git.walk_and_compute_sha1_from_directory(
self.tmp_root_path,
dir_ok_fn=lambda dirpath: b'sample-folder/foo' not in dirpath)
keys1 = list(walk1.keys())
self.assertTrue(path_excluded not in keys1)
# remove the keys that can't be the same (due to hash definition)
# Those are the top level folders
keys_diff = [self.tmp_root_path,
os.path.join(self.tmp_root_path, b'sample-folder'),
git.ROOT_TREE_KEY]
for k in keys_diff:
self.assertNotEquals(walk0[k], walk1[k])
# The remaining keys (bottom path) should have exactly the same hashes
# as before
keys = set(keys1) - set(keys_diff)
actual_walk1 = {}
for k in keys:
self.assertEquals(walk0[k], walk1[k])
actual_walk1[k] = walk1[k]
expected_checksums = {
os.path.join(self.tmp_root_path, b'sample-folder/empty-folder'): [], # noqa
os.path.join(self.tmp_root_path, b'sample-folder/bar/barfoo'): [{ # noqa
'type': git.GitType.BLOB, # noqa
'sha256': b'=\xb5\xae\x16\x80U\xbc\xd9:M\x08(]\xc9\x9f\xfe\xe2\x883\x03\xb2?\xac^\xab\x85\x02s\xa8\xeaUF', # noqa
'name': b'another-quote.org', # noqa
'path': os.path.join(self.tmp_root_path, b'sample-folder/bar/barfoo/another-quote.org'), # noqa
'perms': git.GitPerm.BLOB, # noqa
'sha1': b'\x90\xa6\x13\x8b\xa5\x99\x15&\x1e\x17\x99H8j\xa1\xcc*\xa9"\n', # noqa
'sha1_git': b'\x136\x93\xb1%\xba\xd2\xb4\xac1\x855\xb8I\x01\xeb\xb1\xf6\xb68'}], # noqa
os.path.join(self.tmp_root_path, b'sample-folder/bar'): [{ # noqa
'type': git.GitType.TREE, # noqa
'perms': git.GitPerm.TREE, # noqa
'name': b'barfoo', # noqa
'path': os.path.join(self.tmp_root_path, b'sample-folder/bar/barfoo'), # noqa
'sha1_git': b'\xc3\x02\x0fk\xf15\xa3\x8cm\xf3\xaf\xeb_\xb3\x822\xc5\xe0p\x87'}]} # noqa
self.assertEquals(actual_walk1, expected_checksums)
This diff is collapsed.
v0.0.3-0-g100d537
\ No newline at end of file
v0.0.4-0-g696d23e
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment