diff --git a/PKG-INFO b/PKG-INFO index 1bf621f6cf331eb6c98f55fa0030bf42fd63a06f..de02b1c34cd05d40ab6a3ceb9596d41fdc775e96 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.0.59 +Version: 0.0.60 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index 1bf621f6cf331eb6c98f55fa0030bf42fd63a06f..de02b1c34cd05d40ab6a3ceb9596d41fdc775e96 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.0.59 +Version: 0.0.60 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py index a5dd4f11ae071a5a20dffed02ae2e9130e1d8f54..6fe27b79f52c0c8ad8397f460d83dd19b613e399 100644 --- a/swh/model/hypothesis_strategies.py +++ b/swh/model/hypothesis_strategies.py @@ -7,11 +7,11 @@ import attr import datetime from hypothesis.strategies import ( - binary, builds, characters, composite, dictionaries, from_regex, - integers, just, lists, none, one_of, sampled_from, text, tuples, + binary, builds, characters, composite, dictionaries, + from_regex, integers, just, lists, none, one_of, + sampled_from, sets, text, tuples, ) - from .from_disk import DentryPerms from .model import ( Person, Timestamp, TimestampWithTimezone, Origin, OriginVisit, @@ -139,33 +139,33 @@ def contents(): return one_of(present_contents(), skipped_contents()) -@composite -def present_contents(draw): - return draw(builds( - Content, - length=integers(min_value=0, max_value=2**63-1), - sha1=sha1(), - sha1_git=sha1_git(), - sha256=binary(min_size=32, max_size=32), - blake2s256=binary(min_size=32, max_size=32), +def present_contents(): + return builds( + Content.from_data, + binary(max_size=4096), status=one_of(just('visible'), just('hidden')), - data=binary(), - )) + ) @composite def skipped_contents(draw): - return draw(builds( - SkippedContent, - length=integers(min_value=-1, max_value=2**63-1), - sha1=optional(sha1()), - sha1_git=optional(sha1_git()), - sha256=optional(binary(min_size=32, max_size=32)), - blake2s256=optional(binary(min_size=32, max_size=32)), - status=just('absent'), + nullify_attrs = draw( + sets(sampled_from(['sha1', 'sha1_git', 'sha256', 'blake2s256'])) + ) + + new_attrs = { + k: None + for k in nullify_attrs + } + + ret = draw(builds( + SkippedContent.from_data, + binary(max_size=4096), reason=pgsql_text(), )) + return attr.evolve(ret, **new_attrs) + def branch_names(): return binary(min_size=1) diff --git a/swh/model/model.py b/swh/model/model.py index aff5a7d64a4d840710e66b0fe323395fab0b5c8d..a3809f9d08ab807e1c715b4ee7e210528b9315c7 100644 --- a/swh/model/model.py +++ b/swh/model/model.py @@ -17,7 +17,7 @@ from .identifiers import ( normalize_timestamp, directory_identifier, revision_identifier, release_identifier, snapshot_identifier ) -from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes +from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, MultiHash class MissingData(Exception): @@ -88,6 +88,45 @@ class Person(BaseModel): name = attr.ib(type=Optional[bytes]) email = attr.ib(type=Optional[bytes]) + @classmethod + def from_fullname(cls, fullname: bytes): + """Returns a Person object, by guessing the name and email from the + fullname, in the `name <email>` format. + + The fullname is left unchanged.""" + if fullname is None: + raise TypeError('fullname is None.') + + name: Optional[bytes] + email: Optional[bytes] + + try: + open_bracket = fullname.index(b'<') + except ValueError: + name = fullname + email = None + else: + raw_name = fullname[:open_bracket] + raw_email = fullname[open_bracket+1:] + + if not raw_name: + name = None + else: + name = raw_name.strip() + + try: + close_bracket = raw_email.rindex(b'>') + except ValueError: + email = raw_email + else: + email = raw_email[:close_bracket] + + return Person( + name=name or None, + email=email or None, + fullname=fullname, + ) + @attr.s(frozen=True) class Timestamp(BaseModel): @@ -390,6 +429,15 @@ class BaseContent(BaseModel): type=str, validator=attr.validators.in_(['visible', 'hidden', 'absent'])) + @staticmethod + def _hash_data(data: bytes): + """Hash some data, returning most of the fields of a content object""" + d = MultiHash.from_data(data).digest() + d['data'] = data + d['length'] = len(data) + + return d + def to_dict(self): content = super().to_dict() if content['ctime'] is None: @@ -448,6 +496,17 @@ class Content(BaseContent): del content['data'] return content + @classmethod + def from_data(cls, data, status='visible') -> 'Content': + """Generate a Content from a given `data` byte string. + + This populates the Content with the hashes and length for the data + passed as argument, as well as the data itself. + """ + d = cls._hash_data(data) + d['status'] = status + return cls(**d) + @classmethod def from_dict(cls, d): return super().from_dict(d, use_subclass=False) @@ -503,6 +562,22 @@ class SkippedContent(BaseContent): del content['origin'] return content + @classmethod + def from_data(cls, data, reason: str) -> 'SkippedContent': + """Generate a SkippedContent from a given `data` byte string. + + This populates the SkippedContent with the hashes and length for the + data passed as argument. + + You can use `attr.evolve` on such a generated content to nullify some + of its attributes, e.g. for tests. + """ + d = cls._hash_data(data) + del d['data'] + d['status'] = 'absent' + d['reason'] = reason + return cls(**d) + @classmethod def from_dict(cls, d): d2 = d diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py index a97c3926b7c3d500b3201ff63fee6c312b755790..8bffa80b491f65e98a51618fb1d5064f35bab15e 100644 --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -7,14 +7,15 @@ import copy import datetime from hypothesis import given +from hypothesis.strategies import binary import pytest from swh.model.model import ( - Content, Directory, Revision, Release, Snapshot, + Content, SkippedContent, Directory, Revision, Release, Snapshot, Timestamp, TimestampWithTimezone, - MissingData, + MissingData, Person ) -from swh.model.hashutil import hash_to_bytes +from swh.model.hashutil import hash_to_bytes, MultiHash from swh.model.hypothesis_strategies import objects, origins, origin_visits from swh.model.identifiers import ( directory_identifier, revision_identifier, release_identifier, @@ -107,6 +108,96 @@ def test_timestampwithtimezone_from_iso8601_negative_utc(): ) +def test_person_from_fullname(): + """The author should have name, email and fullname filled. + + """ + actual_person = Person.from_fullname(b'tony <ynot@dagobah>') + assert actual_person == Person( + fullname=b'tony <ynot@dagobah>', + name=b'tony', + email=b'ynot@dagobah', + ) + + +def test_person_from_fullname_no_email(): + """The author and fullname should be the same as the input (author). + + """ + actual_person = Person.from_fullname(b'tony') + assert actual_person == Person( + fullname=b'tony', + name=b'tony', + email=None, + ) + + +def test_person_from_fullname_empty_person(): + """Empty person has only its fullname filled with the empty + byte-string. + + """ + actual_person = Person.from_fullname(b'') + assert actual_person == Person( + fullname=b'', + name=None, + email=None, + ) + + +def test_git_author_line_to_author(): + # edge case out of the way + with pytest.raises(TypeError): + Person.from_fullname(None) + + tests = { + b'a <b@c.com>': Person( + name=b'a', + email=b'b@c.com', + fullname=b'a <b@c.com>', + ), + b'<foo@bar.com>': Person( + name=None, + email=b'foo@bar.com', + fullname=b'<foo@bar.com>', + ), + b'malformed <email': Person( + name=b'malformed', + email=b'email', + fullname=b'malformed <email' + ), + b'malformed <"<br"@ckets>': Person( + name=b'malformed', + email=b'"<br"@ckets', + fullname=b'malformed <"<br"@ckets>', + ), + b'trailing <sp@c.e> ': Person( + name=b'trailing', + email=b'sp@c.e', + fullname=b'trailing <sp@c.e> ', + ), + b'no<sp@c.e>': Person( + name=b'no', + email=b'sp@c.e', + fullname=b'no<sp@c.e>', + ), + b' more <sp@c.es>': Person( + name=b'more', + email=b'sp@c.es', + fullname=b' more <sp@c.es>', + ), + b' <>': Person( + name=None, + email=None, + fullname=b' <>', + ), + } + + for person in sorted(tests): + expected_person = tests[person] + assert expected_person == Person.from_fullname(person) + + def test_content_get_hash(): hashes = dict( sha1=b'foo', sha1_git=b'bar', sha256=b'baz', blake2s256=b'qux') @@ -137,6 +228,36 @@ def test_content_data_missing(): c.with_data() +@given(binary(max_size=4096)) +def test_content_from_data(data): + c = Content.from_data(data) + assert c.data == data + assert c.length == len(data) + assert c.status == 'visible' + for key, value in MultiHash.from_data(data).digest().items(): + assert getattr(c, key) == value + + +@given(binary(max_size=4096)) +def test_hidden_content_from_data(data): + c = Content.from_data(data, status='hidden') + assert c.data == data + assert c.length == len(data) + assert c.status == 'hidden' + for key, value in MultiHash.from_data(data).digest().items(): + assert getattr(c, key) == value + + +@given(binary(max_size=4096)) +def test_skipped_content_from_data(data): + c = SkippedContent.from_data(data, reason='reason') + assert c.reason == 'reason' + assert c.length == len(data) + assert c.status == 'absent' + for key, value in MultiHash.from_data(data).digest().items(): + assert getattr(c, key) == value + + def test_directory_model_id_computation(): dir_dict = dict(directory_example) del dir_dict['id'] diff --git a/version.txt b/version.txt index 701b99b5360f9bddabbb8c41f73545f84d0f1ff6..ed7808504f726444e81919a73be3543107695917 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.59-0-gcb075eb \ No newline at end of file +v0.0.60-0-ga5a9f57 \ No newline at end of file