diff --git a/PKG-INFO b/PKG-INFO index e55a2261039ccf24310cd0953bc662b715e7b6f3..d8810ecc802516a4b8c4ba0e5bdd877d3f692253 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 5.0.0 +Version: 6.0.0 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/pytest.ini b/pytest.ini index b15e082c739bb4a8d62e9b2be811a076339b9e0b..c5186e582bb1a58fec0cb9bb6b7f30ee4210727f 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,6 +1,6 @@ [pytest] addopts = --doctest-modules -p no:pytest_swh_core -norecursedirs = docs .* +norecursedirs = build docs .* markers = fs: tests that involve filesystem ios requires_optional_deps: tests in test_cli.py that should not run if optional dependencies are not installed diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index e55a2261039ccf24310cd0953bc662b715e7b6f3..d8810ecc802516a4b8c4ba0e5bdd877d3f692253 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 5.0.0 +Version: 6.0.0 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh/model/git_objects.py b/swh/model/git_objects.py index 55c7b1ee043b7a55be38acdecdaa1c663b01f9ee..7b36da641d4ce1d445257c4dca9a222015802591 100644 --- a/swh/model/git_objects.py +++ b/swh/model/git_objects.py @@ -340,10 +340,15 @@ def revision_git_object(revision: Union[Dict, model.Revision]) -> bytes: if parent: headers.append((b"parent", hash_to_bytehex(parent))) - headers.append((b"author", format_author_data(revision.author, revision.date))) - headers.append( - (b"committer", format_author_data(revision.committer, revision.committer_date),) - ) + if revision.author is not None: + headers.append((b"author", format_author_data(revision.author, revision.date))) + if revision.committer is not None: + headers.append( + ( + b"committer", + format_author_data(revision.committer, revision.committer_date), + ) + ) # Handle extra headers metadata = revision.metadata or ImmutableDict() diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py index f44e5de5115a20c502663cb9ebf29f2249dee994..54f552cd2abb27d72088269fe7d28faaf49a272f 100644 --- a/swh/model/hypothesis_strategies.py +++ b/swh/model/hypothesis_strategies.py @@ -217,6 +217,7 @@ def releases_d(draw): d = draw( one_of( + # None author/date: builds( dict, name=name, @@ -228,6 +229,7 @@ def releases_d(draw): target_type=target_type, metadata=metadata, ), + # non-None author/date: builds( dict, name=name, @@ -239,6 +241,8 @@ def releases_d(draw): target_type=target_type, metadata=metadata, ), + # it is also possible for date to be None but not author, but let's not + # overwhelm hypothesis with this edge case ) ) @@ -264,19 +268,39 @@ def extra_headers(): @composite def revisions_d(draw): d = draw( - builds( - dict, - message=optional(binary()), - synthetic=booleans(), - author=persons_d(), - committer=persons_d(), - date=timestamps_with_timezone_d(), - committer_date=timestamps_with_timezone_d(), - parents=tuples(sha1_git()), - directory=sha1_git(), - type=sampled_from([x.value for x in RevisionType]), - metadata=optional(revision_metadata()), - extra_headers=extra_headers(), + one_of( + # None author/committer/date/committer_date + builds( + dict, + message=optional(binary()), + synthetic=booleans(), + author=none(), + committer=none(), + date=none(), + committer_date=none(), + parents=tuples(sha1_git()), + directory=sha1_git(), + type=sampled_from([x.value for x in RevisionType]), + metadata=optional(revision_metadata()), + extra_headers=extra_headers(), + ), + # non-None author/committer/date/committer_date + builds( + dict, + message=optional(binary()), + synthetic=booleans(), + author=persons_d(), + committer=persons_d(), + date=timestamps_with_timezone_d(), + committer_date=timestamps_with_timezone_d(), + parents=tuples(sha1_git()), + directory=sha1_git(), + type=sampled_from([x.value for x in RevisionType]), + metadata=optional(revision_metadata()), + extra_headers=extra_headers(), + ), + # There are many other combinations, but let's not overwhelm hypothesis + # with these edge cases ) ) # TODO: metadata['extra_headers'] can have binary keys and values diff --git a/swh/model/model.py b/swh/model/model.py index 7bed1c37540b3ba02311968413007c7a638b9696..3eca67445fa6e7fd5be9fa860c97ed53adbd3ed9 100644 --- a/swh/model/model.py +++ b/swh/model/model.py @@ -289,8 +289,8 @@ class Person(BaseModel): object_type: Final = "person" fullname = attr.ib(type=bytes, validator=type_validator()) - name = attr.ib(type=Optional[bytes], validator=type_validator()) - email = attr.ib(type=Optional[bytes], validator=type_validator()) + name = attr.ib(type=Optional[bytes], validator=type_validator(), eq=False) + email = attr.ib(type=Optional[bytes], validator=type_validator(), eq=False) @classmethod def from_fullname(cls, fullname: bytes): @@ -835,8 +835,8 @@ class Revision(HashableObjectWithManifest, BaseModel): object_type: Final = "revision" message = attr.ib(type=Optional[bytes], validator=type_validator()) - author = attr.ib(type=Person, validator=type_validator()) - committer = attr.ib(type=Person, validator=type_validator()) + author = attr.ib(type=Optional[Person], validator=type_validator()) + committer = attr.ib(type=Optional[Person], validator=type_validator()) date = attr.ib(type=Optional[TimestampWithTimezone], validator=type_validator()) committer_date = attr.ib( type=Optional[TimestampWithTimezone], validator=type_validator() @@ -877,6 +877,20 @@ class Revision(HashableObjectWithManifest, BaseModel): def _compute_hash_from_attributes(self) -> bytes: return _compute_hash_from_manifest(git_objects.revision_git_object(self)) + @author.validator + def check_author(self, attribute, value): + """If the author is `None`, checks the date is `None` too.""" + if self.author is None and self.date is not None: + raise ValueError("revision date must be None if author is None.") + + @committer.validator + def check_committer(self, attribute, value): + """If the committer is `None`, checks the committer_date is `None` too.""" + if self.committer is None and self.committer_date is not None: + raise ValueError( + "revision committer_date must be None if committer is None." + ) + @classmethod def from_dict(cls, d): d = d.copy() @@ -888,9 +902,17 @@ class Revision(HashableObjectWithManifest, BaseModel): if committer_date: committer_date = TimestampWithTimezone.from_dict(committer_date) + author = d.pop("author") + if author: + author = Person.from_dict(author) + + committer = d.pop("committer") + if committer: + committer = Person.from_dict(committer) + return cls( - author=Person.from_dict(d.pop("author")), - committer=Person.from_dict(d.pop("committer")), + author=author, + committer=committer, date=date, committer_date=committer_date, type=RevisionType(d.pop("type")), @@ -909,7 +931,9 @@ class Revision(HashableObjectWithManifest, BaseModel): Person object. """ return attr.evolve( - self, author=self.author.anonymize(), committer=self.committer.anonymize() + self, + author=None if self.author is None else self.author.anonymize(), + committer=None if self.committer is None else self.committer.anonymize(), ) diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py index e00ce2573e22ef47482b579f7c3586ba86bd6107..03b9ca7729417ca1e0570478592ed96de0379dd1 100644 --- a/swh/model/tests/swh_model_data.py +++ b/swh/model/tests/swh_model_data.py @@ -129,6 +129,27 @@ REVISIONS = [ parents=(), extra_headers=((b"foo", b"bar"),), ), + Revision( + id=hash_to_bytes("51580d63b8dcc0ec73e74994e66896858542840a"), + message=b"hello", + date=DATES[0], + committer=COMMITTERS[0], + author=COMMITTERS[0], + committer_date=DATES[0], + type=RevisionType.GIT, + directory=b"\x01" * 20, + synthetic=False, + metadata=None, + parents=(hash_to_bytes("9b918dd063cec85c2bc63cc7f167e29f5894dcbc"),), + raw_manifest=( + b"commit 207\x00" + b"tree 0101010101010101010101010101010101010101\n" + b"parent 9B918DD063CEC85C2BC63CC7F167E29F5894DCBC" # upper-cased + b"nauthor foo 1234567891 +0200\n" + b"committer foo 1234567891 +0200" + b"\n\nhello" + ), + ), ] EXTIDS = [ @@ -166,6 +187,27 @@ RELEASES = [ message=b"bar", synthetic=False, ), + Release( + id=hash_to_bytes("1cdd1e87234b6f066d0855a3b5b567638a55d583"), + name=b"v0.0.1", + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1234567890, microseconds=0,), + offset_bytes=b"+0200", + ), + author=COMMITTERS[0], + target_type=ObjectType.REVISION, + target=b"\x04" * 20, + message=b"foo", + synthetic=False, + raw_manifest=( + b"tag 102\x00" + b"object 0404040404040404040404040404040404040404\n" + b"type commit\n" + b"tag v0.0.1\n" + b"tagger foo 1234567890 +200" # missing leading 0 for timezone + b"\n\nfoo" + ), + ), ] ORIGINS = [ @@ -281,6 +323,19 @@ DIRECTORIES = [ ), ), ), + Directory( + id=hash_to_bytes("d135a91ac82a754e7f4bdeff8d56ef06d921eb7d"), + entries=( + DirectoryEntry( + name=b"file1.ext", perms=0o644, type="file", target=b"\x11" * 20, + ), + ), + raw_manifest=( + b"tree 34\x00" + + b"00644 file1.ext\x00" # added two leading zeros + + b"\x11" * 20 + ), + ), ] diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py index 6d97e59f09603972371f1fe97dc07174875f6190..2e7fa7746bf1a06c82452a10350e125c1e24c94e 100644 --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -646,6 +646,18 @@ def test_git_author_line_to_author(): assert expected_person == Person.from_fullname(person) +def test_person_comparison(): + """Check only the fullname attribute is used to compare Person objects + + """ + person = Person(fullname=b"p1", name=None, email=None) + assert attr.evolve(person, name=b"toto") == person + assert attr.evolve(person, email=b"toto@example.com") == person + + person = Person(fullname=b"", name=b"toto", email=b"toto@example.com") + assert attr.evolve(person, fullname=b"dude") != person + + # Content @@ -1112,6 +1124,33 @@ def test_revision_extra_headers_as_lists_from_dict(): assert rev_model.extra_headers == extra_headers +def test_revision_no_author_or_committer_from_dict(): + rev_dict = revision_example.copy() + rev_dict["author"] = rev_dict["date"] = None + rev_dict["committer"] = rev_dict["committer_date"] = None + rev_model = Revision.from_dict(rev_dict) + assert rev_model.to_dict() == { + **rev_dict, + "parents": tuple(rev_dict["parents"]), + "extra_headers": (), + "metadata": None, + } + + +def test_revision_none_author_or_committer(): + rev_dict = revision_example.copy() + rev_dict["author"] = None + with pytest.raises(ValueError, match=".*date must be None if author is None.*"): + Revision.from_dict(rev_dict) + + rev_dict = revision_example.copy() + rev_dict["committer"] = None + with pytest.raises( + ValueError, match=".*committer_date must be None if committer is None.*" + ): + Revision.from_dict(rev_dict) + + @given(strategies.objects(split_content=True)) def test_object_type(objtype_and_obj): obj_type, obj = objtype_and_obj