Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • anlambert/swh-model
  • lunar/swh-model
  • franckbret/swh-model
  • douardda/swh-model
  • olasd/swh-model
  • swh/devel/swh-model
  • Alphare/swh-model
  • samplet/swh-model
  • marmoute/swh-model
  • rboyer/swh-model
10 results
Show changes
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from dataclasses import dataclass
from typing import Iterable, List
from swh.model import discovery, model
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Sha1Git
from swh.model.tests.test_identifiers import directory_example
pytest_plugins = ["aiohttp.pytest_plugin"]
UNKNOWN_HASH = hash_to_bytes("17140cb6109f1e3296dc52e2b2cd29bcb40e86be")
KNOWN_CONTENT_HASH = hash_to_bytes("e8e4106de42e2d5d5efab6a9422b9a8677c993c8")
KNOWN_DIRECTORY_HASH = hash_to_bytes("d7ed3d2c31d608823be58b1cbe57605310615231")
KNOWN_DIRECTORY_HASH_2 = hash_to_bytes("c76724e9a0be4b60f4bf0cb48b261df8eda94b1d")
@dataclass
class FakeArchive:
contents: List[model.Content]
skipped_contents: List[model.SkippedContent]
directories: List[model.Directory]
def content_missing(self, contents: List[Sha1Git]) -> Iterable[Sha1Git]:
return []
def skipped_content_missing(
self, skipped_contents: List[Sha1Git]
) -> Iterable[Sha1Git]:
"""List skipped content missing from the archive by sha1"""
return []
def directory_missing(self, directories: List[Sha1Git]) -> Iterable[Sha1Git]:
"""List directories missing from the archive by sha1"""
return []
def test_filter_known_objects(monkeypatch):
# Test with smaller sample sizes to actually trigger the random sampling
monkeypatch.setattr(discovery, "SAMPLE_SIZE", 1)
base_directory = model.Directory.from_dict(directory_example)
# Hardcoding another hash is enough since it's all that's being checked
directory_data = directory_example.copy()
directory_data["id"] = KNOWN_DIRECTORY_HASH_2
other_directory = model.Directory.from_dict(directory_data)
archive = FakeArchive(
contents=[model.Content.from_data(b"blabla")],
skipped_contents=[model.SkippedContent.from_data(b"blabla2", reason="reason")],
directories=[
base_directory,
other_directory,
],
)
assert archive.contents[0].sha1_git == KNOWN_CONTENT_HASH
assert archive.directories[0].id == KNOWN_DIRECTORY_HASH
assert archive.directories[1].id == KNOWN_DIRECTORY_HASH_2
(contents, skipped_contents, directories) = discovery.filter_known_objects(archive)
assert len(contents) == 0
assert len(skipped_contents) == 0
assert len(directories) == 0
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import attr
import pytest
from swh.model.model import ModelObjectType
from swh.model.tests.swh_model_data import TEST_OBJECTS
@pytest.mark.parametrize("object_type, objects", TEST_OBJECTS.items())
def test_swh_model_data(object_type, objects):
"""checks model objects in swh_model_data are in correct shape"""
assert objects
for obj in objects:
assert obj.object_type == object_type
attr.validate(obj)
@pytest.mark.parametrize(
"object_type",
(
ModelObjectType.DIRECTORY,
ModelObjectType.REVISION,
ModelObjectType.RELEASE,
ModelObjectType.SNAPSHOT,
),
)
def test_swh_model_data_hash(object_type):
for obj in TEST_OBJECTS[object_type]:
assert (
obj.compute_hash() == obj.id
), f"{obj.compute_hash().hex()} != {obj.id.hex()}"
def test_ensure_visit_status_date_consistency():
"""ensure origin-visit-status dates are more recent than their visit counterpart
The origin-visit-status dates needs to be shifted slightly in the future from their
visit dates counterpart. Otherwise, we are hitting storage-wise the "on conflict"
ignore policy (because origin-visit-add creates an origin-visit-status with the same
parameters from the origin-visit {origin, visit, date}...
"""
visits = TEST_OBJECTS[ModelObjectType.ORIGIN_VISIT]
visit_statuses = TEST_OBJECTS[ModelObjectType.ORIGIN_VISIT_STATUS]
for visit, visit_status in zip(visits, visit_statuses):
assert visit.origin == visit_status.origin
assert visit.visit == visit_status.visit
assert visit.date < visit_status.date
def test_ensure_visit_status_snapshot_consistency():
"""ensure origin-visit-status snapshots exist in the test dataset"""
snapshots = [snp.id for snp in TEST_OBJECTS[ModelObjectType.SNAPSHOT]]
for visit_status in TEST_OBJECTS[ModelObjectType.ORIGIN_VISIT_STATUS]:
if visit_status.snapshot:
assert visit_status.snapshot in snapshots
This diff is collapsed.
This diff is collapsed.