Skip to content
Snippets Groups Projects
Commit 0d5bc177 authored by Stefano Zacchiroli's avatar Stefano Zacchiroli
Browse files

add swh-identify CLI tool to compute persistent identifiers

Currently only content and directory object types are supported, but more can
be added in the future.

Closes T1039
parent 0c00db97
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/python3
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import sys
from swh.model.from_disk import Content
from swh.model.hashutil import hash_to_hex
HASH_ALGO = 'sha1_git'
def hash_file(fname):
return hash_to_hex(Content.from_file(path=fname.encode()).hash)
def main(fnames):
for f in fnames:
print(f, hash_file(f), sep='\t')
if __name__ == '__main__':
fnames = sys.argv[1:]
if not fnames:
print('Usage: swh-hash-file FILE...')
sys.exit(2)
main(fnames)
......@@ -31,6 +31,10 @@ setup(
packages=find_packages(), # packages's modules
scripts=[], # scripts to package
install_requires=parse_requirements() + extra_requirements,
entry_points='''
[console_scripts]
swh-identify=swh.model.cli:identify
''',
setup_requires=['vcversioner'],
vcversioner={},
include_package_data=True,
......
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import click
import os
import sys
from swh.model import identifiers as pids
from swh.model.from_disk import Content, Directory
class PidParamType(click.ParamType):
name = 'persistent identifier'
def convert(self, value, param, ctx):
try:
_parsed_pid = pids.parse_persistent_identifier(value) # noqa
return value # return as string, as we need just that
except Exception:
# TODO catch more specific parsing exception. Requires
# https://forge.softwareheritage.org/T1104 to be addressed first.
self.fail('%s is not a valid PID' % value, param, ctx)
def pid_of_file(path):
object = Content.from_file(path=path).get_data()
return pids.persistent_identifier(pids.CONTENT, object)
def pid_of_dir(path):
object = Directory.from_disk(path=path).get_data()
return pids.persistent_identifier(pids.DIRECTORY, object)
@click.command()
@click.option('--type', '-t', default='auto',
type=click.Choice(['auto', 'content', 'directory']),
help='type of object to identify (default: auto)')
@click.option('--verify', '-v', metavar='PID', type=PidParamType(),
help='reference identifier to be compared with computed one')
@click.argument('object',
type=click.Path(exists=True, readable=True,
allow_dash=True, path_type=bytes))
def identify(type, verify, object):
"""Compute the Software Heritage persistent identifier (PID) for a given
source code object.
For more details about Software Heritage PIDs see:
\b
https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html
\b
Examples:
\b
$ swh-identify /usr/src/linux/kernel/
swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab
\b
$ swh-identify /usr/src/linux/kernel/sched/deadline.c
swh:1:cnt:57b939c81bce5d06fa587df8915f05affbe22b82
"""
if type == 'auto':
if os.path.isfile(object):
type = 'content'
elif os.path.isdir(object):
type = 'directory'
else: # shouldn't happen, due to path validation
raise click.BadParameter('%s is neither a file nor a directory' %
object)
pid = None
if type == 'content':
pid = pid_of_file(object)
elif type == 'directory':
pid = pid_of_dir(object)
else: # shouldn't happen, due to option validation
raise click.BadParameter('invalid object type: ' + type)
if verify:
if verify == pid:
click.echo('PID match: %s' % pid)
sys.exit(0)
else:
click.echo('PID mismatch: %s != %s' % (verify, pid))
sys.exit(1)
else:
click.echo(pid)
if __name__ == '__main__':
identify()
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import tempfile
import unittest
from click.testing import CliRunner
from swh.model import cli
from swh.model.tests.test_from_disk import DataMixin
from swh.model.hashutil import hash_to_hex
class TestIdentify(DataMixin, unittest.TestCase):
def setUp(self):
super().setUp()
self.runner = CliRunner()
def test_content_id(self):
self.make_contents(self.tmpdir_name)
for filename, content in self.contents.items():
path = os.path.join(self.tmpdir_name, filename)
result = self.runner.invoke(cli.identify,
['--type', 'content', path])
self.assertEqual(result.exit_code, 0)
self.assertEqual(result.output.rstrip(),
'swh:1:cnt:' + hash_to_hex(content['sha1_git']))
def test_directory_id(self):
self.make_from_tarball(self.tmpdir_name)
path = os.path.join(self.tmpdir_name, b'sample-folder')
result = self.runner.invoke(cli.identify,
['--type', 'directory', path])
self.assertEqual(result.exit_code, 0)
self.assertEqual(result.output.rstrip(),
'swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759')
def test_auto_id(self):
with tempfile.NamedTemporaryFile(prefix='swh.model.cli') as f:
result = self.runner.invoke(cli.identify, [f.name])
self.assertEqual(result.exit_code, 0)
self.assertRegex(result.output, r'^swh:\d+:cnt:')
with tempfile.TemporaryDirectory(prefix='swh.model.cli') as dirname:
result = self.runner.invoke(cli.identify, [dirname])
self.assertEqual(result.exit_code, 0)
self.assertRegex(result.output, r'^swh:\d+:dir:')
def test_verify_content(self):
self.make_contents(self.tmpdir_name)
for filename, content in self.contents.items():
expected_id = 'swh:1:cnt:' + hash_to_hex(content['sha1_git'])
# match
path = os.path.join(self.tmpdir_name, filename)
result = self.runner.invoke(cli.identify,
['--verify', expected_id, path])
self.assertEqual(result.exit_code, 0)
# mismatch
with open(path, 'a') as f:
f.write('trailing garbage to make verification fail')
result = self.runner.invoke(cli.identify,
['--verify', expected_id, path])
self.assertEqual(result.exit_code, 1)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment