Skip to content
Snippets Groups Projects
Commit ded150d6 authored by Nicolas Dandrimont's avatar Nicolas Dandrimont
Browse files

Add a method to generate Content/SkippedContent from binary data

This lets us generate Content objects directly from a bytestring, with the
proper set of hashes auto-generated from the contents.
parent cb075eb3
No related branches found
No related tags found
No related merge requests found
......@@ -17,7 +17,7 @@ from .identifiers import (
normalize_timestamp, directory_identifier, revision_identifier,
release_identifier, snapshot_identifier
)
from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes
from .hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, MultiHash
class MissingData(Exception):
......@@ -390,6 +390,15 @@ class BaseContent(BaseModel):
type=str,
validator=attr.validators.in_(['visible', 'hidden', 'absent']))
@staticmethod
def _hash_data(data: bytes):
"""Hash some data, returning most of the fields of a content object"""
d = MultiHash.from_data(data).digest()
d['data'] = data
d['length'] = len(data)
return d
def to_dict(self):
content = super().to_dict()
if content['ctime'] is None:
......@@ -448,6 +457,17 @@ class Content(BaseContent):
del content['data']
return content
@classmethod
def from_data(cls, data, status='visible') -> 'Content':
"""Generate a Content from a given `data` byte string.
This populates the Content with the hashes and length for the data
passed as argument, as well as the data itself.
"""
d = cls._hash_data(data)
d['status'] = status
return cls(**d)
@classmethod
def from_dict(cls, d):
return super().from_dict(d, use_subclass=False)
......@@ -503,6 +523,22 @@ class SkippedContent(BaseContent):
del content['origin']
return content
@classmethod
def from_data(cls, data, reason: str) -> 'SkippedContent':
"""Generate a SkippedContent from a given `data` byte string.
This populates the SkippedContent with the hashes and length for the
data passed as argument.
You can use `attr.evolve` on such a generated content to nullify some
of its attributes, e.g. for tests.
"""
d = cls._hash_data(data)
del d['data']
d['status'] = 'absent'
d['reason'] = reason
return cls(**d)
@classmethod
def from_dict(cls, d):
d2 = d
......
......@@ -7,14 +7,15 @@ import copy
import datetime
from hypothesis import given
from hypothesis.strategies import binary
import pytest
from swh.model.model import (
Content, Directory, Revision, Release, Snapshot,
Content, SkippedContent, Directory, Revision, Release, Snapshot,
Timestamp, TimestampWithTimezone,
MissingData,
)
from swh.model.hashutil import hash_to_bytes
from swh.model.hashutil import hash_to_bytes, MultiHash
from swh.model.hypothesis_strategies import objects, origins, origin_visits
from swh.model.identifiers import (
directory_identifier, revision_identifier, release_identifier,
......@@ -137,6 +138,36 @@ def test_content_data_missing():
c.with_data()
@given(binary(max_size=4096))
def test_content_from_data(data):
c = Content.from_data(data)
assert c.data == data
assert c.length == len(data)
assert c.status == 'visible'
for key, value in MultiHash.from_data(data).digest().items():
assert getattr(c, key) == value
@given(binary(max_size=4096))
def test_hidden_content_from_data(data):
c = Content.from_data(data, status='hidden')
assert c.data == data
assert c.length == len(data)
assert c.status == 'hidden'
for key, value in MultiHash.from_data(data).digest().items():
assert getattr(c, key) == value
@given(binary(max_size=4096))
def test_skipped_content_from_data(data):
c = SkippedContent.from_data(data, reason='reason')
assert c.reason == 'reason'
assert c.length == len(data)
assert c.status == 'absent'
for key, value in MultiHash.from_data(data).digest().items():
assert getattr(c, key) == value
def test_directory_model_id_computation():
dir_dict = dict(directory_example)
del dir_dict['id']
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment