diff --git a/PKG-INFO b/PKG-INFO index 6bc733ad5b95c2e35c582409a392cab46405d246..db060458509c8318c24862a8adc8eacd7e83299e 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,14 +1,14 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.0.42 +Version: 0.0.43 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN -Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-model +Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Description: swh-model ========= diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index 6bc733ad5b95c2e35c582409a392cab46405d246..db060458509c8318c24862a8adc8eacd7e83299e 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,14 +1,14 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.0.42 +Version: 0.0.43 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN -Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-model +Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Description: swh-model ========= diff --git a/swh/model/cli.py b/swh/model/cli.py index 8355629596db9ef1a4d2c8bc22e4ed4ee0e59267..853efa995876adb1c8eef54503e52fb5b5a7b988 100644 --- a/swh/model/cli.py +++ b/swh/model/cli.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 The Software Heritage developers +# Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -8,6 +8,7 @@ import os import sys from functools import partial +from urllib.parse import urlparse from swh.model import identifiers as pids from swh.model.exceptions import ValidationError @@ -38,25 +39,40 @@ def pid_of_dir(path): return pids.persistent_identifier(pids.DIRECTORY, object) +def pid_of_origin(url): + pid = pids.PersistentId(object_type='origin', + object_id=pids.origin_identifier({'url': url})) + return str(pid) + + def identify_object(obj_type, follow_symlinks, obj): if obj_type == 'auto': if os.path.isfile(obj): obj_type = 'content' elif os.path.isdir(obj): obj_type = 'directory' - else: # shouldn't happen, due to path validation - raise click.BadParameter('%s is neither a file nor a directory' % - obj) - - path = obj - if follow_symlinks and os.path.islink(obj): - path = os.path.realpath(obj) + else: + try: # URL parsing + if urlparse(obj).scheme: + obj_type = 'origin' + else: + raise ValueError + except ValueError: + raise click.BadParameter('cannot detect object type for %s' % + obj) pid = None - if obj_type == 'content': - pid = pid_of_file(path) - elif obj_type == 'directory': - pid = pid_of_dir(path) + + if obj_type in ['content', 'directory']: + path = obj.encode(sys.getfilesystemencoding()) + if follow_symlinks and os.path.islink(obj): + path = os.path.realpath(obj) + if obj_type == 'content': + pid = pid_of_file(path) + elif obj_type == 'directory': + pid = pid_of_dir(path) + elif obj_type == 'origin': + pid = pid_of_origin(obj) else: # shouldn't happen, due to option validation raise click.BadParameter('invalid object type: ' + obj_type) @@ -73,13 +89,11 @@ def identify_object(obj_type, follow_symlinks, obj): @click.option('--filename/--no-filename', 'show_filename', default=True, help='show/hide file name (default: show)') @click.option('--type', '-t', 'obj_type', default='auto', - type=click.Choice(['auto', 'content', 'directory']), + type=click.Choice(['auto', 'content', 'directory', 'origin']), help='type of object to identify (default: auto)') @click.option('--verify', '-v', metavar='PID', type=PidParamType(), help='reference identifier to be compared with computed one') -@click.argument('objects', nargs=-1, required=True, - type=click.Path(exists=True, readable=True, - allow_dash=True, path_type=bytes)) +@click.argument('objects', nargs=-1, required=True) def identify(obj_type, verify, show_filename, follow_symlinks, objects): """Compute the Software Heritage persistent identifier (PID) for the given source code object(s). diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index d8f4b28710d33538ed273172b1a7b52e83f37d6a..62e031b09df10093b7465a8ce32ea6e7bd12c7bf 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 The Software Heritage developers +# Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -22,6 +22,14 @@ RELEASE = 'release' DIRECTORY = 'directory' CONTENT = 'content' +PID_NAMESPACE = 'swh' +PID_VERSION = 1 +PID_TYPES = ['ori', 'snp', 'rel', 'rev', 'dir', 'cnt'] +PID_KEYS = ['namespace', 'scheme_version', 'object_type', 'object_id', + 'metadata'] +PID_SEP = ':' +PID_CTXT_SEP = ';' + @lru_cache() def identifier_to_bytes(identifier): @@ -631,15 +639,8 @@ _object_type_map = { } } -PERSISTENT_IDENTIFIER_TYPES = ['ori', 'snp', 'rel', 'rev', 'dir', 'cnt'] - -PERSISTENT_IDENTIFIER_KEYS = [ - 'namespace', 'scheme_version', 'object_type', 'object_id', 'metadata'] - -PERSISTENT_IDENTIFIER_PARTS_SEP = ';' - -class PersistentId(namedtuple('PersistentId', PERSISTENT_IDENTIFIER_KEYS)): +class PersistentId(namedtuple('PersistentId', PID_KEYS)): """ Named tuple holding the relevant info associated to a Software Heritage persistent identifier. @@ -680,7 +681,7 @@ class PersistentId(namedtuple('PersistentId', PERSISTENT_IDENTIFIER_KEYS)): """ __slots__ = () - def __new__(cls, namespace='swh', scheme_version=1, + def __new__(cls, namespace=PID_NAMESPACE, scheme_version=PID_VERSION, object_type='', object_id='', metadata={}): o = _object_type_map.get(object_type) if not o: @@ -696,11 +697,11 @@ class PersistentId(namedtuple('PersistentId', PERSISTENT_IDENTIFIER_KEYS)): def __str__(self): o = _object_type_map.get(self.object_type) - pid = '%s:%s:%s:%s' % (self.namespace, self.scheme_version, - o['short_name'], self.object_id) + pid = PID_SEP.join([self.namespace, str(self.scheme_version), + o['short_name'], self.object_id]) if self.metadata: for k, v in self.metadata.items(): - pid += '%s%s=%s' % (PERSISTENT_IDENTIFIER_PARTS_SEP, k, v) + pid += '%s%s=%s' % (PID_CTXT_SEP, k, v) return pid @@ -755,7 +756,7 @@ def parse_persistent_identifier(persistent_id): """ # <pid>;<contextual-information> - persistent_id_parts = persistent_id.split(PERSISTENT_IDENTIFIER_PARTS_SEP) + persistent_id_parts = persistent_id.split(PID_CTXT_SEP) pid_data = persistent_id_parts.pop(0).split(':') if len(pid_data) != 4: @@ -764,17 +765,17 @@ def parse_persistent_identifier(persistent_id): # Checking for parsing errors _ns, _version, _type, _id = pid_data - if _ns != 'swh': + if _ns != PID_NAMESPACE: raise ValidationError( - 'Wrong format: Supported namespace is \'swh\'') + "Wrong format: only supported namespace is '%s'" % PID_NAMESPACE) - if _version != '1': + if _version != str(PID_VERSION): raise ValidationError( - 'Wrong format: Supported version is 1') + 'Wrong format: only supported version is %d' % PID_VERSION) pid_data[1] = int(pid_data[1]) - expected_types = PERSISTENT_IDENTIFIER_TYPES + expected_types = PID_TYPES if _type not in expected_types: raise ValidationError( 'Wrong format: Supported types are %s' % ( diff --git a/swh/model/model.py b/swh/model/model.py index 3ee19a83a89e5af67ef7a5a030abc76343ede1ee..fc5a21fccce4fab6127fdff88d393197505240e1 100644 --- a/swh/model/model.py +++ b/swh/model/model.py @@ -214,19 +214,15 @@ class Release(BaseModel): id = attr.ib(type=Sha1Git) name = attr.ib(type=bytes) message = attr.ib(type=bytes) - target = attr.ib(type=Optional[Sha1Git], - validator=attr.validators.optional([])) + target = attr.ib(type=Optional[Sha1Git]) target_type = attr.ib(type=ObjectType) synthetic = attr.ib(type=bool) author = attr.ib(type=Optional[Person], - default=None, - validator=attr.validators.optional([])) + default=None) date = attr.ib(type=Optional[TimestampWithTimezone], - default=None, - validator=attr.validators.optional([])) + default=None) metadata = attr.ib(type=Optional[Dict[str, object]], - default=None, - validator=attr.validators.optional([])) + default=None) @author.validator def check_author(self, attribute, value): @@ -274,8 +270,7 @@ class Revision(BaseModel): directory = attr.ib(type=Sha1Git) synthetic = attr.ib(type=bool) metadata = attr.ib(type=Optional[Dict[str, object]], - default=None, - validator=attr.validators.optional([])) + default=None) parents = attr.ib(type=List[Sha1Git], default=attr.Factory(list)) @@ -343,11 +338,9 @@ class Content(BaseModel): type=str, validator=attr.validators.in_(['visible', 'absent', 'hidden'])) reason = attr.ib(type=Optional[str], - default=None, - validator=attr.validators.optional([])) + default=None) data = attr.ib(type=Optional[bytes], - default=None, - validator=attr.validators.optional([])) + default=None) ctime = attr.ib(type=Optional[datetime.datetime], default=None) diff --git a/swh/model/tests/test_cli.py b/swh/model/tests/test_cli.py index e4232fe2c4b4a70aeac2fb8991ab883e6b83b98a..7f70b46d119192790d2e6c4f1740f30cefbf1a82 100644 --- a/swh/model/tests/test_cli.py +++ b/swh/model/tests/test_cli.py @@ -45,6 +45,13 @@ class TestIdentify(DataMixin, unittest.TestCase): self.assertPidOK(result, 'swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759') + def test_origin_id(self): + """identify an origin URL""" + url = 'https://github.com/torvalds/linux' + result = self.runner.invoke(cli.identify, ['--type', 'origin', url]) + self.assertPidOK(result, + 'swh:1:ori:b63a575fe3faab7692c9f38fb09d4bb45651bb0f') + def test_symlink(self): """identify symlink --- both itself and target""" regular = os.path.join(self.tmpdir_name, b'foo.txt') @@ -84,18 +91,27 @@ class TestIdentify(DataMixin, unittest.TestCase): self.assertPidOK(result, 'swh:1:cnt:' + hash_to_hex(content['sha1_git'])) - def test_auto_id(self): - """automatic object type: file or directory, depending on argument""" + def test_auto_content(self): + """automatic object type detection: content""" with tempfile.NamedTemporaryFile(prefix='swh.model.cli') as f: result = self.runner.invoke(cli.identify, [f.name]) self.assertEqual(result.exit_code, 0) self.assertRegex(result.output, r'^swh:\d+:cnt:') + def test_auto_directory(self): + """automatic object type detection: directory""" with tempfile.TemporaryDirectory(prefix='swh.model.cli') as dirname: result = self.runner.invoke(cli.identify, [dirname]) self.assertEqual(result.exit_code, 0) self.assertRegex(result.output, r'^swh:\d+:dir:') + def test_auto_origin(self): + """automatic object type detection: origin""" + result = self.runner.invoke(cli.identify, + ['https://github.com/torvalds/linux']) + self.assertEqual(result.exit_code, 0) + self.assertRegex(result.output, r'^swh:\d+:ori:') + def test_verify_content(self): """identifier verification""" self.make_contents(self.tmpdir_name) diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index 410cf402140600feada6502ddd0dc15dfdcfe2a4..83294d5a3b835ad54782e3cad1f526138aeae33b 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -10,7 +10,7 @@ import unittest from swh.model import hashutil, identifiers from swh.model.exceptions import ValidationError from swh.model.identifiers import (CONTENT, DIRECTORY, - PERSISTENT_IDENTIFIER_TYPES, RELEASE, + PID_TYPES, RELEASE, REVISION, SNAPSHOT, PersistentId) @@ -876,12 +876,12 @@ class SnapshotIdentifier(unittest.TestCase): ('swh:1:cnt:', 'Wrong format: Identifier should be present'), ('foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505', - 'Wrong format: Supported namespace is \'swh\''), + 'Wrong format: only supported namespace is \'swh\''), ('swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505', - 'Wrong format: Supported version is 1'), + 'Wrong format: only supported version is 1'), ('swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505', 'Wrong format: Supported types are %s' % ( - ', '.join(PERSISTENT_IDENTIFIER_TYPES))), + ', '.join(PID_TYPES))), ('swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;' 'malformed', 'Contextual data is badly formatted, form key=val expected'), diff --git a/version.txt b/version.txt index 29db2c20c626155010e014246ba22dd264740ee4..9142329e4fcf6bd4d6fbbcf74b6bdbd3e13f9ae4 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.42-0-g6df68b0 \ No newline at end of file +v0.0.43-0-gfd2e6da \ No newline at end of file