diff --git a/PKG-INFO b/PKG-INFO index bc16f4c8c7ebb08c67566494e31ed46b024a646f..ddd9d07bf0c0d35cfe3a999bf15e7ea679f38430 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.0.49 +Version: 0.0.50 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers @@ -35,5 +35,5 @@ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown -Provides-Extra: cli Provides-Extra: testing +Provides-Extra: cli diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index bc16f4c8c7ebb08c67566494e31ed46b024a646f..ddd9d07bf0c0d35cfe3a999bf15e7ea679f38430 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.0.49 +Version: 0.0.50 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers @@ -35,5 +35,5 @@ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown -Provides-Extra: cli Provides-Extra: testing +Provides-Extra: cli diff --git a/swh.model.egg-info/SOURCES.txt b/swh.model.egg-info/SOURCES.txt index 04bf4380c214574dcb3adb3f8848ca750233e2e3..4e69253ca8ad3c0e9e10a71c98dccbcf599c4788 100644 --- a/swh.model.egg-info/SOURCES.txt +++ b/swh.model.egg-info/SOURCES.txt @@ -28,9 +28,11 @@ swh/model/fields/compound.py swh/model/fields/hashes.py swh/model/fields/simple.py swh/model/tests/__init__.py +swh/model/tests/generate_testdata.py swh/model/tests/generate_testdata_from_disk.py swh/model/tests/test_cli.py swh/model/tests/test_from_disk.py +swh/model/tests/test_generate_testdata.py swh/model/tests/test_hashutil.py swh/model/tests/test_hypothesis_strategies.py swh/model/tests/test_identifiers.py diff --git a/swh.model.egg-info/requires.txt b/swh.model.egg-info/requires.txt index 88a6ba3df5f19879e693e8fe9f62e4d4b8aada06..718cd8a1eee84fa9efce95ccfd41e2aff20ec815 100644 --- a/swh.model.egg-info/requires.txt +++ b/swh.model.egg-info/requires.txt @@ -14,3 +14,4 @@ dulwich Click dulwich pytest +pytz diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py index 3a205a3ca779def557ab2eb11b8d615b8e9d2c8e..ca568ee7e909421192f1cb9ae62111ca86ea5670 100644 --- a/swh/model/hypothesis_strategies.py +++ b/swh/model/hypothesis_strategies.py @@ -3,6 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import attr import datetime from hypothesis.strategies import ( @@ -94,9 +95,10 @@ def releases(draw): author=none(), date=none(), target=sha1_git())) - rel.date = date - rel.author = author - return rel + return attr.evolve( + rel, + date=date, + author=author) def revision_metadata(): diff --git a/swh/model/model.py b/swh/model/model.py index 217e3632b6d2976a55b90f54f581ccf68bcb7776..baf4f54110b860e6967de9871065a747acd0a9e7 100644 --- a/swh/model/model.py +++ b/swh/model/model.py @@ -51,7 +51,7 @@ class BaseModel: return cls(**d) -@attr.s +@attr.s(frozen=True) class Person(BaseModel): """Represents the author/committer of a revision or release.""" name = attr.ib(type=bytes) @@ -59,7 +59,7 @@ class Person(BaseModel): fullname = attr.ib(type=bytes) -@attr.s +@attr.s(frozen=True) class Timestamp(BaseModel): """Represents a naive timestamp from a VCS.""" seconds = attr.ib(type=int) @@ -78,7 +78,7 @@ class Timestamp(BaseModel): raise ValueError('Microseconds must be in [0, 1000000[.') -@attr.s +@attr.s(frozen=True) class TimestampWithTimezone(BaseModel): """Represents a TZ-aware timestamp from a VCS.""" timestamp = attr.ib(type=Timestamp) @@ -105,7 +105,7 @@ class TimestampWithTimezone(BaseModel): negative_utc=d['negative_utc']) -@attr.s +@attr.s(frozen=True) class Origin(BaseModel): """Represents a software source: a VCS and an URL.""" url = attr.ib(type=str) @@ -117,7 +117,7 @@ class Origin(BaseModel): return r -@attr.s +@attr.s(frozen=True) class OriginVisit(BaseModel): """Represents a visit of an origin at a given point in time, by a SWH loader.""" @@ -176,7 +176,7 @@ class ObjectType(Enum): SNAPSHOT = 'snapshot' -@attr.s +@attr.s(frozen=True) class SnapshotBranch(BaseModel): """Represents one of the branches of a snapshot.""" target = attr.ib(type=bytes) @@ -198,7 +198,7 @@ class SnapshotBranch(BaseModel): target_type=TargetType(d['target_type'])) -@attr.s +@attr.s(frozen=True) class Snapshot(BaseModel): """Represents the full state of an origin at a given point in time.""" id = attr.ib(type=Sha1Git) @@ -214,7 +214,7 @@ class Snapshot(BaseModel): }) -@attr.s +@attr.s(frozen=True) class Release(BaseModel): id = attr.ib(type=Sha1Git) name = attr.ib(type=bytes) @@ -261,7 +261,7 @@ class RevisionType(Enum): MERCURIAL = 'hg' -@attr.s +@attr.s(frozen=True) class Revision(BaseModel): id = attr.ib(type=Sha1Git) message = attr.ib(type=bytes) @@ -291,7 +291,7 @@ class Revision(BaseModel): **d) -@attr.s +@attr.s(frozen=True) class DirectoryEntry(BaseModel): name = attr.ib(type=bytes) type = attr.ib(type=str, @@ -301,7 +301,7 @@ class DirectoryEntry(BaseModel): """Usually one of the values of `swh.model.from_disk.DentryPerms`.""" -@attr.s +@attr.s(frozen=True) class Directory(BaseModel): id = attr.ib(type=Sha1Git) entries = attr.ib(type=List[DirectoryEntry]) @@ -314,7 +314,7 @@ class Directory(BaseModel): for entry in d['entries']]) -@attr.s +@attr.s(frozen=True) class Content(BaseModel): sha1 = attr.ib(type=bytes) sha1_git = attr.ib(type=Sha1Git) diff --git a/swh/model/tests/generate_testdata.py b/swh/model/tests/generate_testdata.py new file mode 100644 index 0000000000000000000000000000000000000000..a495d55032c2de3853c1d6e36abfb76295d0a33d --- /dev/null +++ b/swh/model/tests/generate_testdata.py @@ -0,0 +1,66 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from datetime import datetime +from pytz import all_timezones, timezone +from random import choice, randint, random, shuffle +from typing import List, Dict + +from swh.model.hashutil import MultiHash + + +PROTOCOLS = ['git', 'http', 'https', 'deb', 'svn', 'mock'] +DOMAINS = ['example.com', 'some.long.host.name', 'xn--n28h.tld'] +PATHS = ['', '/', '/stuff', '/stuff/', + '/path/to/resource', + '/path/with/anchor#id=42', + '/path/with/qargs?q=1&b'] +CONTENT_STATUS = ['visible', 'hidden', 'absent'] +MAX_DATE = 3e9 # around 2065 + + +def gen_all_origins(): + for protocol in PROTOCOLS: + for domain in DOMAINS: + for urlpath in PATHS: + yield {'url': '%s://%s%s' % (protocol, domain, urlpath)} + + +ORIGINS = list(gen_all_origins()) + + +def gen_origins(n: int = 100) -> List: + """Returns a list of n randomly generated origins suitable for using as + Storage.add_origin() argument. + + """ + origins = ORIGINS[:] + shuffle(origins) + return origins[:n] + + +def gen_content(): + size = randint(1, 10 * 1024) + data = bytes(randint(0, 255) for i in range(size)) + status = choice(CONTENT_STATUS) + h = MultiHash.from_data(data) + ctime = datetime.fromtimestamp( + random() * MAX_DATE, timezone(choice(all_timezones))) + content = {'data': data, + 'status': status, + 'length': size, + 'ctime': ctime, + **h.digest()} + if status == 'absent': + content['reason'] = 'why not' + content['data'] = b'' + return content + + +def gen_contents(n=20) -> List[Dict]: + """Returns a list of n randomly generated content objects (as dict) suitable + for using as Storage.content_add() argument. + """ + return [gen_content() for i in range(n)] diff --git a/swh/model/tests/test_generate_testdata.py b/swh/model/tests/test_generate_testdata.py new file mode 100644 index 0000000000000000000000000000000000000000..60ee24623c1a8c6d2048b3629abe4fdded67a3e6 --- /dev/null +++ b/swh/model/tests/test_generate_testdata.py @@ -0,0 +1,54 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from .generate_testdata import gen_contents, gen_origins, ORIGINS + +from swh.model.model import Origin, Content + + +def test_gen_origins_empty(): + origins = gen_origins(0) + assert not origins + + +def test_gen_origins_one(): + origins = gen_origins(1) + assert len(origins) == 1 + assert [Origin.from_dict(d) for d in origins] + + +def test_gen_origins_default(): + origins = gen_origins() + assert len(origins) == 100 + models = [Origin.from_dict(d).url for d in origins] + assert len(origins) == len(set(models)) + + +def test_gen_origins_max(): + nmax = len(ORIGINS) + origins = gen_origins(nmax+1) + assert len(origins) == nmax + models = {Origin.from_dict(d).url for d in origins} + # ensure we did not generate the same origin twice + assert len(origins) == len(models) + + +def test_gen_contents_empty(): + contents = gen_contents(0) + assert not contents + + +def test_gen_contents_one(): + contents = gen_contents(1) + assert len(contents) == 1 + assert [Content.from_dict(d) for d in contents] + + +def test_gen_contents_default(): + contents = gen_contents() + assert len(contents) == 20 + models = {Content.from_dict(d) for d in contents} + # ensure we did not generate the same content twice + assert len(contents) == len(models) diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py index b2cc3edc5fa9c2a6cb361942b9ed9f56a6b83ca0..2900cd1f9fd6862a4bbd605e1e51fccec50cc6b6 100644 --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -3,6 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import attr import copy from hypothesis import given @@ -44,8 +45,9 @@ def test_todict_origin_visits(origin_visit): obj = origin_visit.to_dict() assert 'type' not in obj['origin'] - origin_visit.origin.type = None - assert origin_visit == type(origin_visit).from_dict(obj) + origin2 = attr.evolve(origin_visit.origin, type=None) + origin_visit2 = attr.evolve(origin_visit, origin=origin2) + assert origin_visit2 == type(origin_visit).from_dict(obj) def test_content_get_hash(): diff --git a/version.txt b/version.txt index 82adc2c9a903d7d7f42b49a983552ae720e5a8d2..87d9c2b83fa7ea845ba38cd3d2faa3a4130d640a 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.49-0-g4b79a2b \ No newline at end of file +v0.0.50-0-gb064a0b \ No newline at end of file