From dbce3cc6da81bee34729ef73a5d21c7b177e7cb1 Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Mon, 21 Mar 2022 15:41:03 +0100
Subject: [PATCH] Add support for None as author or committer of a Revision

committer=None happens on some malformed commits generated by old dgit
versions ; and it is possible for author=None to happen for the same reason.
---
 swh/model/git_objects.py           | 13 +++++---
 swh/model/hypothesis_strategies.py | 50 ++++++++++++++++++++++--------
 swh/model/model.py                 | 34 +++++++++++++++++---
 swh/model/tests/test_model.py      | 27 ++++++++++++++++
 4 files changed, 102 insertions(+), 22 deletions(-)

diff --git a/swh/model/git_objects.py b/swh/model/git_objects.py
index 55c7b1ee..7b36da64 100644
--- a/swh/model/git_objects.py
+++ b/swh/model/git_objects.py
@@ -340,10 +340,15 @@ def revision_git_object(revision: Union[Dict, model.Revision]) -> bytes:
         if parent:
             headers.append((b"parent", hash_to_bytehex(parent)))
 
-    headers.append((b"author", format_author_data(revision.author, revision.date)))
-    headers.append(
-        (b"committer", format_author_data(revision.committer, revision.committer_date),)
-    )
+    if revision.author is not None:
+        headers.append((b"author", format_author_data(revision.author, revision.date)))
+    if revision.committer is not None:
+        headers.append(
+            (
+                b"committer",
+                format_author_data(revision.committer, revision.committer_date),
+            )
+        )
 
     # Handle extra headers
     metadata = revision.metadata or ImmutableDict()
diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py
index f44e5de5..54f552cd 100644
--- a/swh/model/hypothesis_strategies.py
+++ b/swh/model/hypothesis_strategies.py
@@ -217,6 +217,7 @@ def releases_d(draw):
 
     d = draw(
         one_of(
+            # None author/date:
             builds(
                 dict,
                 name=name,
@@ -228,6 +229,7 @@ def releases_d(draw):
                 target_type=target_type,
                 metadata=metadata,
             ),
+            # non-None author/date:
             builds(
                 dict,
                 name=name,
@@ -239,6 +241,8 @@ def releases_d(draw):
                 target_type=target_type,
                 metadata=metadata,
             ),
+            # it is also possible for date to be None but not author, but let's not
+            # overwhelm hypothesis with this edge case
         )
     )
 
@@ -264,19 +268,39 @@ def extra_headers():
 @composite
 def revisions_d(draw):
     d = draw(
-        builds(
-            dict,
-            message=optional(binary()),
-            synthetic=booleans(),
-            author=persons_d(),
-            committer=persons_d(),
-            date=timestamps_with_timezone_d(),
-            committer_date=timestamps_with_timezone_d(),
-            parents=tuples(sha1_git()),
-            directory=sha1_git(),
-            type=sampled_from([x.value for x in RevisionType]),
-            metadata=optional(revision_metadata()),
-            extra_headers=extra_headers(),
+        one_of(
+            # None author/committer/date/committer_date
+            builds(
+                dict,
+                message=optional(binary()),
+                synthetic=booleans(),
+                author=none(),
+                committer=none(),
+                date=none(),
+                committer_date=none(),
+                parents=tuples(sha1_git()),
+                directory=sha1_git(),
+                type=sampled_from([x.value for x in RevisionType]),
+                metadata=optional(revision_metadata()),
+                extra_headers=extra_headers(),
+            ),
+            # non-None author/committer/date/committer_date
+            builds(
+                dict,
+                message=optional(binary()),
+                synthetic=booleans(),
+                author=persons_d(),
+                committer=persons_d(),
+                date=timestamps_with_timezone_d(),
+                committer_date=timestamps_with_timezone_d(),
+                parents=tuples(sha1_git()),
+                directory=sha1_git(),
+                type=sampled_from([x.value for x in RevisionType]),
+                metadata=optional(revision_metadata()),
+                extra_headers=extra_headers(),
+            ),
+            # There are many other combinations, but let's not overwhelm hypothesis
+            # with these edge cases
         )
     )
     # TODO: metadata['extra_headers'] can have binary keys and values
diff --git a/swh/model/model.py b/swh/model/model.py
index 1e5f555e..3eca6744 100644
--- a/swh/model/model.py
+++ b/swh/model/model.py
@@ -835,8 +835,8 @@ class Revision(HashableObjectWithManifest, BaseModel):
     object_type: Final = "revision"
 
     message = attr.ib(type=Optional[bytes], validator=type_validator())
-    author = attr.ib(type=Person, validator=type_validator())
-    committer = attr.ib(type=Person, validator=type_validator())
+    author = attr.ib(type=Optional[Person], validator=type_validator())
+    committer = attr.ib(type=Optional[Person], validator=type_validator())
     date = attr.ib(type=Optional[TimestampWithTimezone], validator=type_validator())
     committer_date = attr.ib(
         type=Optional[TimestampWithTimezone], validator=type_validator()
@@ -877,6 +877,20 @@ class Revision(HashableObjectWithManifest, BaseModel):
     def _compute_hash_from_attributes(self) -> bytes:
         return _compute_hash_from_manifest(git_objects.revision_git_object(self))
 
+    @author.validator
+    def check_author(self, attribute, value):
+        """If the author is `None`, checks the date is `None` too."""
+        if self.author is None and self.date is not None:
+            raise ValueError("revision date must be None if author is None.")
+
+    @committer.validator
+    def check_committer(self, attribute, value):
+        """If the committer is `None`, checks the committer_date is `None` too."""
+        if self.committer is None and self.committer_date is not None:
+            raise ValueError(
+                "revision committer_date must be None if committer is None."
+            )
+
     @classmethod
     def from_dict(cls, d):
         d = d.copy()
@@ -888,9 +902,17 @@ class Revision(HashableObjectWithManifest, BaseModel):
         if committer_date:
             committer_date = TimestampWithTimezone.from_dict(committer_date)
 
+        author = d.pop("author")
+        if author:
+            author = Person.from_dict(author)
+
+        committer = d.pop("committer")
+        if committer:
+            committer = Person.from_dict(committer)
+
         return cls(
-            author=Person.from_dict(d.pop("author")),
-            committer=Person.from_dict(d.pop("committer")),
+            author=author,
+            committer=committer,
             date=date,
             committer_date=committer_date,
             type=RevisionType(d.pop("type")),
@@ -909,7 +931,9 @@ class Revision(HashableObjectWithManifest, BaseModel):
         Person object.
         """
         return attr.evolve(
-            self, author=self.author.anonymize(), committer=self.committer.anonymize()
+            self,
+            author=None if self.author is None else self.author.anonymize(),
+            committer=None if self.committer is None else self.committer.anonymize(),
         )
 
 
diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py
index 0efbcfe4..2e7fa774 100644
--- a/swh/model/tests/test_model.py
+++ b/swh/model/tests/test_model.py
@@ -1124,6 +1124,33 @@ def test_revision_extra_headers_as_lists_from_dict():
     assert rev_model.extra_headers == extra_headers
 
 
+def test_revision_no_author_or_committer_from_dict():
+    rev_dict = revision_example.copy()
+    rev_dict["author"] = rev_dict["date"] = None
+    rev_dict["committer"] = rev_dict["committer_date"] = None
+    rev_model = Revision.from_dict(rev_dict)
+    assert rev_model.to_dict() == {
+        **rev_dict,
+        "parents": tuple(rev_dict["parents"]),
+        "extra_headers": (),
+        "metadata": None,
+    }
+
+
+def test_revision_none_author_or_committer():
+    rev_dict = revision_example.copy()
+    rev_dict["author"] = None
+    with pytest.raises(ValueError, match=".*date must be None if author is None.*"):
+        Revision.from_dict(rev_dict)
+
+    rev_dict = revision_example.copy()
+    rev_dict["committer"] = None
+    with pytest.raises(
+        ValueError, match=".*committer_date must be None if committer is None.*"
+    ):
+        Revision.from_dict(rev_dict)
+
+
 @given(strategies.objects(split_content=True))
 def test_object_type(objtype_and_obj):
     obj_type, obj = objtype_and_obj
-- 
GitLab