From ded150d6c32e482086b8d35c509cc6a2f3166b63 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Mon, 2 Mar 2020 10:35:05 +0100
Subject: [PATCH] Add a method to generate Content/SkippedContent from binary
 data

This lets us generate Content objects directly from a bytestring, with the
proper set of hashes auto-generated from the contents.
---
 swh/model/model.py            | 38 ++++++++++++++++++++++++++++++++++-
 swh/model/tests/test_model.py | 35 ++++++++++++++++++++++++++++++--
 2 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/swh/model/model.py b/swh/model/model.py
index aff5a7d6..4f0810f2 100644
--- a/swh/model/model.py
+++ b/swh/model/model.py
@@ -17,7 +17,7 @@ from .identifiers import (
     normalize_timestamp, directory_identifier, revision_identifier,
     release_identifier, snapshot_identifier
 )
-from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes
+from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, MultiHash
 
 
 class MissingData(Exception):
@@ -390,6 +390,15 @@ class BaseContent(BaseModel):
         type=str,
         validator=attr.validators.in_(['visible', 'hidden', 'absent']))
 
+    @staticmethod
+    def _hash_data(data: bytes):
+        """Hash some data, returning most of the fields of a content object"""
+        d = MultiHash.from_data(data).digest()
+        d['data'] = data
+        d['length'] = len(data)
+
+        return d
+
     def to_dict(self):
         content = super().to_dict()
         if content['ctime'] is None:
@@ -448,6 +457,17 @@ class Content(BaseContent):
             del content['data']
         return content
 
+    @classmethod
+    def from_data(cls, data, status='visible') -> 'Content':
+        """Generate a Content from a given `data` byte string.
+
+        This populates the Content with the hashes and length for the data
+        passed as argument, as well as the data itself.
+        """
+        d = cls._hash_data(data)
+        d['status'] = status
+        return cls(**d)
+
     @classmethod
     def from_dict(cls, d):
         return super().from_dict(d, use_subclass=False)
@@ -503,6 +523,22 @@ class SkippedContent(BaseContent):
             del content['origin']
         return content
 
+    @classmethod
+    def from_data(cls, data, reason: str) -> 'SkippedContent':
+        """Generate a SkippedContent from a given `data` byte string.
+
+        This populates the SkippedContent with the hashes and length for the
+        data passed as argument.
+
+        You can use `attr.evolve` on such a generated content to nullify some
+        of its attributes, e.g. for tests.
+        """
+        d = cls._hash_data(data)
+        del d['data']
+        d['status'] = 'absent'
+        d['reason'] = reason
+        return cls(**d)
+
     @classmethod
     def from_dict(cls, d):
         d2 = d
diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py
index a97c3926..be3219de 100644
--- a/swh/model/tests/test_model.py
+++ b/swh/model/tests/test_model.py
@@ -7,14 +7,15 @@ import copy
 import datetime
 
 from hypothesis import given
+from hypothesis.strategies import binary
 import pytest
 
 from swh.model.model import (
-    Content, Directory, Revision, Release, Snapshot,
+    Content, SkippedContent, Directory, Revision, Release, Snapshot,
     Timestamp, TimestampWithTimezone,
     MissingData,
 )
-from swh.model.hashutil import hash_to_bytes
+from swh.model.hashutil import hash_to_bytes, MultiHash
 from swh.model.hypothesis_strategies import objects, origins, origin_visits
 from swh.model.identifiers import (
     directory_identifier, revision_identifier, release_identifier,
@@ -137,6 +138,36 @@ def test_content_data_missing():
         c.with_data()
 
 
+@given(binary(max_size=4096))
+def test_content_from_data(data):
+    c = Content.from_data(data)
+    assert c.data == data
+    assert c.length == len(data)
+    assert c.status == 'visible'
+    for key, value in MultiHash.from_data(data).digest().items():
+        assert getattr(c, key) == value
+
+
+@given(binary(max_size=4096))
+def test_hidden_content_from_data(data):
+    c = Content.from_data(data, status='hidden')
+    assert c.data == data
+    assert c.length == len(data)
+    assert c.status == 'hidden'
+    for key, value in MultiHash.from_data(data).digest().items():
+        assert getattr(c, key) == value
+
+
+@given(binary(max_size=4096))
+def test_skipped_content_from_data(data):
+    c = SkippedContent.from_data(data, reason='reason')
+    assert c.reason == 'reason'
+    assert c.length == len(data)
+    assert c.status == 'absent'
+    for key, value in MultiHash.from_data(data).digest().items():
+        assert getattr(c, key) == value
+
+
 def test_directory_model_id_computation():
     dir_dict = dict(directory_example)
     del dir_dict['id']
-- 
GitLab