Add a method to generate Content/SkippedContent from binary data

This lets us generate Content objects directly from a bytestring, with the proper set of hashes auto-generated from the contents.

Add a method to generate Content/SkippedContent from binary data
This lets us generate Content objects directly from a bytestring, with the proper set of hashes auto-generated from the contents.
ded150d6 · Nicolas Dandrimont · cb075eb3 · ded150d6 · ded150d6
Commit ded150d6 authored 5 years ago by Nicolas Dandrimont
--- a/swh/model/model.py
+++ b/swh/model/model.py
@@ -17,7 +17,7 @@ from .identifiers import (
    normalize_timestamp, directory_identifier, revision_identifier,
    release_identifier, snapshot_identifier
 )
-from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes
+from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, MultiHash


 class MissingData(Exception):
@@ -390,6 +390,15 @@ class BaseContent(BaseModel):
        type=str,
        validator=attr.validators.in_(['visible', 'hidden', 'absent']))

+    @staticmethod
+    def _hash_data(data: bytes):
+        """Hash some data, returning most of the fields of a content object"""
+        d = MultiHash.from_data(data).digest()
+        d['data'] = data
+        d['length'] = len(data)
+
+        return d
+
    def to_dict(self):
        content = super().to_dict()
        if content['ctime'] is None:
@@ -448,6 +457,17 @@ class Content(BaseContent):
            del content['data']
        return content

+    @classmethod
+    def from_data(cls, data, status='visible') -> 'Content':
+        """Generate a Content from a given `data` byte string.
+
+        This populates the Content with the hashes and length for the data
+        passed as argument, as well as the data itself.
+        """
+        d = cls._hash_data(data)
+        d['status'] = status
+        return cls(**d)
+
    @classmethod
    def from_dict(cls, d):
        return super().from_dict(d, use_subclass=False)
@@ -503,6 +523,22 @@ class SkippedContent(BaseContent):
            del content['origin']
        return content

+    @classmethod
+    def from_data(cls, data, reason: str) -> 'SkippedContent':
+        """Generate a SkippedContent from a given `data` byte string.
+
+        This populates the SkippedContent with the hashes and length for the
+        data passed as argument.
+
+        You can use `attr.evolve` on such a generated content to nullify some
+        of its attributes, e.g. for tests.
+        """
+        d = cls._hash_data(data)
+        del d['data']
+        d['status'] = 'absent'
+        d['reason'] = reason
+        return cls(**d)
+
    @classmethod
    def from_dict(cls, d):
        d2 = d

--- a/swh/model/tests/test_model.py
+++ b/swh/model/tests/test_model.py
@@ -7,14 +7,15 @@ import copy
 import datetime

 from hypothesis import given
+from hypothesis.strategies import binary
 import pytest

 from swh.model.model import (
-    Content, Directory, Revision, Release, Snapshot,
+    Content, SkippedContent, Directory, Revision, Release, Snapshot,
    Timestamp, TimestampWithTimezone,
    MissingData,
 )
-from swh.model.hashutil import hash_to_bytes
+from swh.model.hashutil import hash_to_bytes, MultiHash
 from swh.model.hypothesis_strategies import objects, origins, origin_visits
 from swh.model.identifiers import (
    directory_identifier, revision_identifier, release_identifier,
@@ -137,6 +138,36 @@ def test_content_data_missing():
        c.with_data()


+@given(binary(max_size=4096))
+def test_content_from_data(data):
+    c = Content.from_data(data)
+    assert c.data == data
+    assert c.length == len(data)
+    assert c.status == 'visible'
+    for key, value in MultiHash.from_data(data).digest().items():
+        assert getattr(c, key) == value
+
+
+@given(binary(max_size=4096))
+def test_hidden_content_from_data(data):
+    c = Content.from_data(data, status='hidden')
+    assert c.data == data
+    assert c.length == len(data)
+    assert c.status == 'hidden'
+    for key, value in MultiHash.from_data(data).digest().items():
+        assert getattr(c, key) == value
+
+
+@given(binary(max_size=4096))
+def test_skipped_content_from_data(data):
+    c = SkippedContent.from_data(data, reason='reason')
+    assert c.reason == 'reason'
+    assert c.length == len(data)
+    assert c.status == 'absent'
+    for key, value in MultiHash.from_data(data).digest().items():
+        assert getattr(c, key) == value
+
+
 def test_directory_model_id_computation():
    dir_dict = dict(directory_example)
    del dir_dict['id']