Skip to content
Snippets Groups Projects
Commit fd2e6dae authored by Stefano Zacchiroli's avatar Stefano Zacchiroli
Browse files

swh identify: add support for origin PIDs

parent 880aff9d
No related branches found
No related tags found
No related merge requests found
# Copyright (C) 2018 The Software Heritage developers # Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution # See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version # License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information # See top-level LICENSE file for more information
...@@ -8,6 +8,7 @@ import os ...@@ -8,6 +8,7 @@ import os
import sys import sys
from functools import partial from functools import partial
from urllib.parse import urlparse
from swh.model import identifiers as pids from swh.model import identifiers as pids
from swh.model.exceptions import ValidationError from swh.model.exceptions import ValidationError
...@@ -38,25 +39,40 @@ def pid_of_dir(path): ...@@ -38,25 +39,40 @@ def pid_of_dir(path):
return pids.persistent_identifier(pids.DIRECTORY, object) return pids.persistent_identifier(pids.DIRECTORY, object)
def pid_of_origin(url):
pid = pids.PersistentId(object_type='origin',
object_id=pids.origin_identifier({'url': url}))
return str(pid)
def identify_object(obj_type, follow_symlinks, obj): def identify_object(obj_type, follow_symlinks, obj):
if obj_type == 'auto': if obj_type == 'auto':
if os.path.isfile(obj): if os.path.isfile(obj):
obj_type = 'content' obj_type = 'content'
elif os.path.isdir(obj): elif os.path.isdir(obj):
obj_type = 'directory' obj_type = 'directory'
else: # shouldn't happen, due to path validation else:
raise click.BadParameter('%s is neither a file nor a directory' % try: # URL parsing
obj) if urlparse(obj).scheme:
obj_type = 'origin'
path = obj else:
if follow_symlinks and os.path.islink(obj): raise ValueError
path = os.path.realpath(obj) except ValueError:
raise click.BadParameter('cannot detect object type for %s' %
obj)
pid = None pid = None
if obj_type == 'content':
pid = pid_of_file(path) if obj_type in ['content', 'directory']:
elif obj_type == 'directory': path = obj.encode(sys.getfilesystemencoding())
pid = pid_of_dir(path) if follow_symlinks and os.path.islink(obj):
path = os.path.realpath(obj)
if obj_type == 'content':
pid = pid_of_file(path)
elif obj_type == 'directory':
pid = pid_of_dir(path)
elif obj_type == 'origin':
pid = pid_of_origin(obj)
else: # shouldn't happen, due to option validation else: # shouldn't happen, due to option validation
raise click.BadParameter('invalid object type: ' + obj_type) raise click.BadParameter('invalid object type: ' + obj_type)
...@@ -73,13 +89,11 @@ def identify_object(obj_type, follow_symlinks, obj): ...@@ -73,13 +89,11 @@ def identify_object(obj_type, follow_symlinks, obj):
@click.option('--filename/--no-filename', 'show_filename', default=True, @click.option('--filename/--no-filename', 'show_filename', default=True,
help='show/hide file name (default: show)') help='show/hide file name (default: show)')
@click.option('--type', '-t', 'obj_type', default='auto', @click.option('--type', '-t', 'obj_type', default='auto',
type=click.Choice(['auto', 'content', 'directory']), type=click.Choice(['auto', 'content', 'directory', 'origin']),
help='type of object to identify (default: auto)') help='type of object to identify (default: auto)')
@click.option('--verify', '-v', metavar='PID', type=PidParamType(), @click.option('--verify', '-v', metavar='PID', type=PidParamType(),
help='reference identifier to be compared with computed one') help='reference identifier to be compared with computed one')
@click.argument('objects', nargs=-1, required=True, @click.argument('objects', nargs=-1, required=True)
type=click.Path(exists=True, readable=True,
allow_dash=True, path_type=bytes))
def identify(obj_type, verify, show_filename, follow_symlinks, objects): def identify(obj_type, verify, show_filename, follow_symlinks, objects):
"""Compute the Software Heritage persistent identifier (PID) for the given """Compute the Software Heritage persistent identifier (PID) for the given
source code object(s). source code object(s).
......
...@@ -45,6 +45,13 @@ class TestIdentify(DataMixin, unittest.TestCase): ...@@ -45,6 +45,13 @@ class TestIdentify(DataMixin, unittest.TestCase):
self.assertPidOK(result, self.assertPidOK(result,
'swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759') 'swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759')
def test_origin_id(self):
"""identify an origin URL"""
url = 'https://github.com/torvalds/linux'
result = self.runner.invoke(cli.identify, ['--type', 'origin', url])
self.assertPidOK(result,
'swh:1:ori:b63a575fe3faab7692c9f38fb09d4bb45651bb0f')
def test_symlink(self): def test_symlink(self):
"""identify symlink --- both itself and target""" """identify symlink --- both itself and target"""
regular = os.path.join(self.tmpdir_name, b'foo.txt') regular = os.path.join(self.tmpdir_name, b'foo.txt')
...@@ -84,18 +91,27 @@ class TestIdentify(DataMixin, unittest.TestCase): ...@@ -84,18 +91,27 @@ class TestIdentify(DataMixin, unittest.TestCase):
self.assertPidOK(result, self.assertPidOK(result,
'swh:1:cnt:' + hash_to_hex(content['sha1_git'])) 'swh:1:cnt:' + hash_to_hex(content['sha1_git']))
def test_auto_id(self): def test_auto_content(self):
"""automatic object type: file or directory, depending on argument""" """automatic object type detection: content"""
with tempfile.NamedTemporaryFile(prefix='swh.model.cli') as f: with tempfile.NamedTemporaryFile(prefix='swh.model.cli') as f:
result = self.runner.invoke(cli.identify, [f.name]) result = self.runner.invoke(cli.identify, [f.name])
self.assertEqual(result.exit_code, 0) self.assertEqual(result.exit_code, 0)
self.assertRegex(result.output, r'^swh:\d+:cnt:') self.assertRegex(result.output, r'^swh:\d+:cnt:')
def test_auto_directory(self):
"""automatic object type detection: directory"""
with tempfile.TemporaryDirectory(prefix='swh.model.cli') as dirname: with tempfile.TemporaryDirectory(prefix='swh.model.cli') as dirname:
result = self.runner.invoke(cli.identify, [dirname]) result = self.runner.invoke(cli.identify, [dirname])
self.assertEqual(result.exit_code, 0) self.assertEqual(result.exit_code, 0)
self.assertRegex(result.output, r'^swh:\d+:dir:') self.assertRegex(result.output, r'^swh:\d+:dir:')
def test_auto_origin(self):
"""automatic object type detection: origin"""
result = self.runner.invoke(cli.identify,
['https://github.com/torvalds/linux'])
self.assertEqual(result.exit_code, 0)
self.assertRegex(result.output, r'^swh:\d+:ori:')
def test_verify_content(self): def test_verify_content(self):
"""identifier verification""" """identifier verification"""
self.make_contents(self.tmpdir_name) self.make_contents(self.tmpdir_name)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment