Skip to content
Snippets Groups Projects
Commit 2c626eee authored by Jenkins for Software Heritage's avatar Jenkins for Software Heritage
Browse files

New upstream version 0.0.48

parents 2c116483 b2c21d32
No related branches found
Tags debian/upstream/0.0.48
No related merge requests found
Metadata-Version: 2.1
Name: swh.model
Version: 0.0.47
Version: 0.0.48
Summary: Software Heritage data model
Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
Author: Software Heritage developers
......@@ -36,3 +36,4 @@ Classifier: Operating System :: OS Independent
Classifier: Development Status :: 5 - Production/Stable
Description-Content-Type: text/markdown
Provides-Extra: testing
Provides-Extra: cli
......@@ -2,7 +2,6 @@
# should match https://pypi.python.org/pypi names. For the full spec or
# dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html
vcversioner
Click
attrs
hypothesis
python-dateutil
......@@ -49,7 +49,10 @@ setup(
setup_requires=['vcversioner'],
install_requires=(parse_requirements() + parse_requirements('swh') +
blake2_requirements),
extras_require={'testing': parse_requirements('test')},
extras_require={
'cli': parse_requirements('cli'),
'testing': parse_requirements('test'),
},
vcversioner={},
include_package_data=True,
entry_points='''
......
Metadata-Version: 2.1
Name: swh.model
Version: 0.0.47
Version: 0.0.48
Summary: Software Heritage data model
Home-page: https://forge.softwareheritage.org/diffusion/DMOD/
Author: Software Heritage developers
......@@ -36,3 +36,4 @@ Classifier: Operating System :: OS Independent
Classifier: Development Status :: 5 - Production/Stable
Description-Content-Type: text/markdown
Provides-Extra: testing
Provides-Extra: cli
......@@ -39,6 +39,7 @@ swh/model/tests/test_model.py
swh/model/tests/test_toposort.py
swh/model/tests/test_validators.py
swh/model/tests/data/dir-folders/sample-folder.tgz
swh/model/tests/data/repos/sample-repo.tgz
swh/model/tests/fields/__init__.py
swh/model/tests/fields/test_compound.py
swh/model/tests/fields/test_hashes.py
......
vcversioner
Click
attrs
hypothesis
python-dateutil
......@@ -7,5 +6,11 @@ python-dateutil
[:python_version < "3.6"]
pyblake2
[cli]
Click
dulwich
[testing]
Click
dulwich
pytest
......@@ -4,12 +4,14 @@
# See top-level LICENSE file for more information
import click
import dulwich.repo
import os
import sys
from functools import partial
from urllib.parse import urlparse
from swh.model import hashutil
from swh.model import identifiers as pids
from swh.model.exceptions import ValidationError
from swh.model.from_disk import Content, Directory
......@@ -17,6 +19,15 @@ from swh.model.from_disk import Content, Directory
CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
# Mapping between dulwich types and Software Heritage ones. Used by snapshot ID
# computation.
_DULWICH_TYPES = {
b'blob': 'content',
b'tree': 'directory',
b'commit': 'revision',
b'tag': 'release',
}
class PidParamType(click.ParamType):
name = 'persistent identifier'
......@@ -45,6 +56,26 @@ def pid_of_origin(url):
return str(pid)
def pid_of_git_repo(path):
repo = dulwich.repo.Repo(path)
branches = {}
for ref, target in repo.refs.as_dict().items():
obj = repo[target]
if obj:
branches[ref] = {
'target': hashutil.bytehex_to_hash(target),
'target_type': _DULWICH_TYPES[obj.type_name],
}
else:
branches[ref] = None
snapshot = {'branches': branches}
pid = pids.PersistentId(object_type='snapshot',
object_id=pids.snapshot_identifier(snapshot))
return str(pid)
def identify_object(obj_type, follow_symlinks, obj):
if obj_type == 'auto':
if os.path.isfile(obj):
......@@ -73,6 +104,8 @@ def identify_object(obj_type, follow_symlinks, obj):
pid = pid_of_dir(path)
elif obj_type == 'origin':
pid = pid_of_origin(obj)
elif obj_type == 'snapshot':
pid = pid_of_git_repo(obj)
else: # shouldn't happen, due to option validation
raise click.BadParameter('invalid object type: ' + obj_type)
......@@ -89,7 +122,8 @@ def identify_object(obj_type, follow_symlinks, obj):
@click.option('--filename/--no-filename', 'show_filename', default=True,
help='show/hide file name (default: show)')
@click.option('--type', '-t', 'obj_type', default='auto',
type=click.Choice(['auto', 'content', 'directory', 'origin']),
type=click.Choice(['auto', 'content', 'directory', 'origin',
'snapshot']),
help='type of object to identify (default: auto)')
@click.option('--verify', '-v', metavar='PID', type=PidParamType(),
help='reference identifier to be compared with computed one')
......@@ -116,7 +150,12 @@ def identify(obj_type, verify, show_filename, follow_symlinks, objects):
$ swh identify --no-filename /usr/src/linux/kernel/
swh:1:dir:f9f858a48d663b3809c9e2f336412717496202ab
"""
\b
$ git clone --mirror https://forge.softwareheritage.org/source/helloworld.git
$ swh identify --type snapshot helloworld.git/
swh:1:snp:510aa88bdc517345d258c1fc2babcd0e1f905e93 helloworld.git
""" # NoQA # overlong lines in shell examples are fine
if verify and len(objects) != 1:
raise click.BadParameter('verification requires a single object')
......
......@@ -606,7 +606,11 @@ def snapshot_identifier(snapshot, *, ignore_unresolved=False):
def origin_identifier(origin):
"""Return the intrinsic identifier for an origin."""
"""Return the intrinsic identifier for an origin.
An origin's identifier is the sha1 checksum of the entire origin URL
"""
return hashlib.sha1(origin['url'].encode('ascii')).hexdigest()
......@@ -695,6 +699,13 @@ class PersistentId(_PersistentId):
if not o:
raise ValidationError('Wrong input: Supported types are %s' % (
list(_object_type_map.keys())))
if namespace != PID_NAMESPACE:
raise ValidationError(
"Wrong format: only supported namespace is '%s'"
% PID_NAMESPACE)
if scheme_version != PID_VERSION:
raise ValidationError(
'Wrong format: only supported version is %d' % PID_VERSION)
# internal swh representation resolution
if isinstance(object_id, dict):
object_id = object_id[o['key_id']]
......@@ -773,22 +784,8 @@ def parse_persistent_identifier(persistent_id):
# Checking for parsing errors
_ns, _version, _type, _id = pid_data
if _ns != PID_NAMESPACE:
raise ValidationError(
"Wrong format: only supported namespace is '%s'" % PID_NAMESPACE)
if _version != str(PID_VERSION):
raise ValidationError(
'Wrong format: only supported version is %d' % PID_VERSION)
pid_data[1] = int(pid_data[1])
expected_types = PID_TYPES
if _type not in expected_types:
raise ValidationError(
'Wrong format: Supported types are %s' % (
', '.join(expected_types)))
for otype, data in _object_type_map.items():
if _type == data['short_name']:
pid_data[2] = otype
......@@ -798,12 +795,6 @@ def parse_persistent_identifier(persistent_id):
raise ValidationError(
'Wrong format: Identifier should be present')
try:
validate_sha1(_id)
except ValidationError:
raise ValidationError(
'Wrong format: Identifier should be a valid hash')
persistent_id_metadata = {}
for part in persistent_id_parts:
try:
......@@ -813,4 +804,4 @@ def parse_persistent_identifier(persistent_id):
msg = 'Contextual data is badly formatted, form key=val expected'
raise ValidationError(msg)
pid_data.append(persistent_id_metadata)
return PersistentId._make(pid_data)
return PersistentId(*pid_data)
......@@ -97,6 +97,11 @@ class Origin(BaseModel):
url = attr.ib(type=str)
type = attr.ib(type=Optional[str], default=None)
def to_dict(self):
r = super().to_dict()
r.pop('type', None)
return r
@attr.s
class OriginVisit(BaseModel):
......@@ -122,6 +127,7 @@ class OriginVisit(BaseModel):
ov = super().to_dict()
if ov['visit'] is None:
del ov['visit']
ov['origin'] = self.origin.to_dict()
return ov
@classmethod
......
File added
# Copyright (C) 2018 The Software Heritage developers
# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import tarfile
import tempfile
import unittest
......@@ -45,6 +46,19 @@ class TestIdentify(DataMixin, unittest.TestCase):
self.assertPidOK(result,
'swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759')
def test_snapshot_id(self):
"""identify a snapshot"""
tarball = os.path.join(os.path.dirname(__file__), 'data', 'repos',
'sample-repo.tgz')
with tempfile.TemporaryDirectory(prefix='swh.model.cli') as d:
with tarfile.open(tarball, 'r:gz') as t:
t.extractall(d)
repo_dir = os.path.join(d, 'sample-repo')
result = self.runner.invoke(cli.identify,
['--type', 'snapshot', repo_dir])
self.assertPidOK(result,
'swh:1:snp:9dc0fc035aabe293f5faf6c362a59513454a170d') # NoQA
def test_origin_id(self):
"""identify an origin URL"""
url = 'https://github.com/torvalds/linux'
......
......@@ -10,8 +10,8 @@ import unittest
from swh.model import hashutil, identifiers
from swh.model.exceptions import ValidationError
from swh.model.identifiers import (CONTENT, DIRECTORY,
PID_TYPES, RELEASE,
REVISION, SNAPSHOT, PersistentId)
RELEASE, REVISION,
SNAPSHOT, PersistentId)
class UtilityFunctionsIdentifier(unittest.TestCase):
......@@ -768,8 +768,8 @@ class SnapshotIdentifier(unittest.TestCase):
'swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453',
None, {}),
(RELEASE, _release_id,
'swh:2:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f',
2, {}),
'swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f',
1, {}),
(REVISION, _revision_id,
'swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d',
None, {}),
......@@ -783,8 +783,8 @@ class SnapshotIdentifier(unittest.TestCase):
'swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453',
None, {}),
(RELEASE, _release,
'swh:2:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f',
2, {}),
'swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f',
1, {}),
(REVISION, _revision,
'swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d',
None, {}),
......@@ -811,12 +811,12 @@ class SnapshotIdentifier(unittest.TestCase):
_snapshot_id = 'notahash4bc0bf3d81436bf980b46e98bd338453'
_snapshot = {'id': _snapshot_id}
for _type, _hash, _error in [
(SNAPSHOT, _snapshot_id, 'Unexpected characters'),
(SNAPSHOT, _snapshot, 'Unexpected characters'),
('foo', '', 'Wrong input: Supported types are'),
for _type, _hash in [
(SNAPSHOT, _snapshot_id),
(SNAPSHOT, _snapshot),
('foo', ''),
]:
with self.assertRaisesRegex(ValidationError, _error):
with self.assertRaises(ValidationError):
identifiers.persistent_identifier(_type, _hash)
def test_parse_persistent_identifier(self):
......@@ -866,34 +866,37 @@ class SnapshotIdentifier(unittest.TestCase):
self.assertEqual(actual_result, expected_result)
def test_parse_persistent_identifier_parsing_error(self):
for pid, _error in [
('swh:1:cnt',
'Wrong format: There should be 4 mandatory values'),
('swh:1:',
'Wrong format: There should be 4 mandatory values'),
('swh:',
'Wrong format: There should be 4 mandatory values'),
('swh:1:cnt:',
'Wrong format: Identifier should be present'),
('foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505',
'Wrong format: only supported namespace is \'swh\''),
('swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505',
'Wrong format: only supported version is 1'),
('swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505',
'Wrong format: Supported types are %s' % (
', '.join(PID_TYPES))),
for pid in [
('swh:1:cnt'),
('swh:1:'),
('swh:'),
('swh:1:cnt:'),
('foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505'),
('swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505'),
('swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505'),
('swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;'
'malformed',
'Contextual data is badly formatted, form key=val expected'),
('swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d',
'Wrong format: Identifier should be a valid hash'),
('swh:1:snp:foo',
'Wrong format: Identifier should be a valid hash')
'malformed'),
('swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d'),
('swh:1:snp:foo'),
]:
with self.assertRaisesRegex(
ValidationError, _error):
with self.assertRaises(ValidationError):
identifiers.parse_persistent_identifier(pid)
def test_persistentid_class_validation_error(self):
for _ns, _version, _type, _id in [
('foo', 1, CONTENT, 'abc8bc9d7a6bcf6db04f476d29314f157507d505'),
('swh', 2, DIRECTORY, 'def8bc9d7a6bcf6db04f476d29314f157507d505'),
('swh', 1, 'foo', 'fed8bc9d7a6bcf6db04f476d29314f157507d505'),
('swh', 1, SNAPSHOT, 'gh6959356d30f1a4e9b7f6bca59b9a336464c03d'),
]:
with self.assertRaises(ValidationError):
PersistentId(
namespace=_ns,
scheme_version=_version,
object_type=_type,
object_id=_id
)
class OriginIdentifier(unittest.TestCase):
def setUp(self):
......
......@@ -8,12 +8,16 @@ import copy
from hypothesis import given
from swh.model.model import Content
from swh.model.hypothesis_strategies import objects
from swh.model.hypothesis_strategies import objects, origins, origin_visits
@given(objects())
def test_todict_inverse_fromdict(objtype_and_obj):
(obj_type, obj) = objtype_and_obj
if obj_type in ('origin', 'origin_visit'):
return
obj_as_dict = obj.to_dict()
obj_as_dict_copy = copy.deepcopy(obj_as_dict)
......@@ -27,6 +31,23 @@ def test_todict_inverse_fromdict(objtype_and_obj):
assert obj_as_dict == type(obj).from_dict(obj_as_dict).to_dict()
@given(origins())
def test_todict_origins(origin):
obj = origin.to_dict()
assert 'type' not in obj
assert type(origin)(url=origin.url) == type(origin).from_dict(obj)
@given(origin_visits())
def test_todict_origin_visits(origin_visit):
obj = origin_visit.to_dict()
assert 'type' not in obj['origin']
origin_visit.origin.type = None
assert origin_visit == type(origin_visit).from_dict(obj)
def test_content_get_hash():
hashes = dict(
sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux')
......
v0.0.47-0-g340b001
\ No newline at end of file
v0.0.48-0-gb2c21d3
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment