From cad940dc8c0705cc4ab8419436a7f40069e909d4 Mon Sep 17 00:00:00 2001
From: David Douard <david.douard@sdfa3.org>
Date: Tue, 26 Jan 2021 16:43:13 +0100
Subject: [PATCH] Add swh-journal's model-related test data set in swh-model

so it's kept up to date when evolutions are made in the model and thus
preventing swh-journal and swh-model to be unecessarly coupled.

Related to T2970.
---
 swh/model/tests/swh_model_data.py      | 348 +++++++++++++++++++++++++
 swh/model/tests/test_swh_model_data.py |  35 +++
 2 files changed, 383 insertions(+)
 create mode 100644 swh/model/tests/swh_model_data.py
 create mode 100644 swh/model/tests/test_swh_model_data.py

diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py
new file mode 100644
index 00000000..8f497096
--- /dev/null
+++ b/swh/model/tests/swh_model_data.py
@@ -0,0 +1,348 @@
+# Copyright (C) 2019-2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import datetime
+from typing import Dict, Sequence
+
+import attr
+
+from swh.model.hashutil import MultiHash, hash_to_bytes, hash_to_hex
+from swh.model.identifiers import SWHID
+from swh.model.model import (
+    BaseModel,
+    Content,
+    Directory,
+    DirectoryEntry,
+    MetadataAuthority,
+    MetadataAuthorityType,
+    MetadataFetcher,
+    MetadataTargetType,
+    ObjectType,
+    Origin,
+    OriginVisit,
+    OriginVisitStatus,
+    Person,
+    RawExtrinsicMetadata,
+    Release,
+    Revision,
+    RevisionType,
+    SkippedContent,
+    Snapshot,
+    SnapshotBranch,
+    TargetType,
+    Timestamp,
+    TimestampWithTimezone,
+)
+
+UTC = datetime.timezone.utc
+
+CONTENTS = [
+    Content(
+        length=4,
+        data=f"foo{i}".encode(),
+        status="visible",
+        **MultiHash.from_data(f"foo{i}".encode()).digest(),
+    )
+    for i in range(10)
+] + [
+    Content(
+        length=14,
+        data=f"forbidden foo{i}".encode(),
+        status="hidden",
+        **MultiHash.from_data(f"forbidden foo{i}".encode()).digest(),
+    )
+    for i in range(10)
+]
+
+SKIPPED_CONTENTS = [
+    SkippedContent(
+        length=4,
+        status="absent",
+        reason=f"because chr({i}) != '*'",
+        **MultiHash.from_data(f"bar{i}".encode()).digest(),
+    )
+    for i in range(2)
+]
+
+duplicate_content1 = Content(
+    length=4,
+    sha1=hash_to_bytes("44973274ccef6ab4dfaaf86599792fa9c3fe4689"),
+    sha1_git=b"another-foo",
+    blake2s256=b"another-bar",
+    sha256=b"another-baz",
+    status="visible",
+)
+
+# Craft a sha1 collision
+sha1_array = bytearray(duplicate_content1.sha1_git)
+sha1_array[0] += 1
+duplicate_content2 = attr.evolve(duplicate_content1, sha1_git=bytes(sha1_array))
+
+
+DUPLICATE_CONTENTS = [duplicate_content1, duplicate_content2]
+
+
+COMMITTERS = [
+    Person(fullname=b"foo", name=b"foo", email=b""),
+    Person(fullname=b"bar", name=b"bar", email=b""),
+]
+
+DATES = [
+    TimestampWithTimezone(
+        timestamp=Timestamp(seconds=1234567891, microseconds=0,),
+        offset=120,
+        negative_utc=False,
+    ),
+    TimestampWithTimezone(
+        timestamp=Timestamp(seconds=1234567892, microseconds=0,),
+        offset=120,
+        negative_utc=False,
+    ),
+]
+
+REVISIONS = [
+    Revision(
+        id=hash_to_bytes("4ca486e65eb68e4986aeef8227d2db1d56ce51b3"),
+        message=b"hello",
+        date=DATES[0],
+        committer=COMMITTERS[0],
+        author=COMMITTERS[0],
+        committer_date=DATES[0],
+        type=RevisionType.GIT,
+        directory=b"\x01" * 20,
+        synthetic=False,
+        metadata=None,
+        parents=(),
+    ),
+    Revision(
+        id=hash_to_bytes("677063f5c405d6fc1781fc56379c9a9adf43d3a0"),
+        message=b"hello again",
+        date=DATES[1],
+        committer=COMMITTERS[1],
+        author=COMMITTERS[1],
+        committer_date=DATES[1],
+        type=RevisionType.MERCURIAL,
+        directory=b"\x02" * 20,
+        synthetic=False,
+        metadata=None,
+        parents=(),
+        extra_headers=((b"foo", b"bar"),),
+    ),
+]
+
+RELEASES = [
+    Release(
+        id=hash_to_bytes("8059dc4e17fcd0e51ca3bcd6b80f4577d281fd08"),
+        name=b"v0.0.1",
+        date=TimestampWithTimezone(
+            timestamp=Timestamp(seconds=1234567890, microseconds=0,),
+            offset=120,
+            negative_utc=False,
+        ),
+        author=COMMITTERS[0],
+        target_type=ObjectType.REVISION,
+        target=b"\x04" * 20,
+        message=b"foo",
+        synthetic=False,
+    ),
+]
+
+ORIGINS = [
+    Origin(url="https://somewhere.org/den/fox",),
+    Origin(url="https://overtherainbow.org/fox/den",),
+]
+
+ORIGIN_VISITS = [
+    OriginVisit(
+        origin=ORIGINS[0].url,
+        date=datetime.datetime(2013, 5, 7, 4, 20, 39, 369271, tzinfo=UTC),
+        visit=1,
+        type="git",
+    ),
+    OriginVisit(
+        origin=ORIGINS[1].url,
+        date=datetime.datetime(2014, 11, 27, 17, 20, 39, tzinfo=UTC),
+        visit=1,
+        type="hg",
+    ),
+    OriginVisit(
+        origin=ORIGINS[0].url,
+        date=datetime.datetime(2018, 11, 27, 17, 20, 39, tzinfo=UTC),
+        visit=2,
+        type="git",
+    ),
+    OriginVisit(
+        origin=ORIGINS[0].url,
+        date=datetime.datetime(2018, 11, 27, 17, 20, 39, tzinfo=UTC),
+        visit=3,
+        type="git",
+    ),
+    OriginVisit(
+        origin=ORIGINS[1].url,
+        date=datetime.datetime(2015, 11, 27, 17, 20, 39, tzinfo=UTC),
+        visit=2,
+        type="hg",
+    ),
+]
+
+# The origin-visit-status dates needs to be shifted slightly in the future from their
+# visit dates counterpart. Otherwise, we are hitting storage-wise the "on conflict"
+# ignore policy (because origin-visit-add creates an origin-visit-status with the same
+# parameters from the origin-visit {origin, visit, date}...
+ORIGIN_VISIT_STATUSES = [
+    OriginVisitStatus(
+        origin=ORIGINS[0].url,
+        date=datetime.datetime(2013, 5, 7, 4, 20, 39, 432222, tzinfo=UTC),
+        visit=1,
+        type="git",
+        status="ongoing",
+        snapshot=None,
+        metadata=None,
+    ),
+    OriginVisitStatus(
+        origin=ORIGINS[1].url,
+        date=datetime.datetime(2014, 11, 27, 17, 21, 12, tzinfo=UTC),
+        visit=1,
+        type="hg",
+        status="ongoing",
+        snapshot=None,
+        metadata=None,
+    ),
+    OriginVisitStatus(
+        origin=ORIGINS[0].url,
+        date=datetime.datetime(2018, 11, 27, 17, 20, 59, tzinfo=UTC),
+        visit=2,
+        type="git",
+        status="ongoing",
+        snapshot=None,
+        metadata=None,
+    ),
+    OriginVisitStatus(
+        origin=ORIGINS[0].url,
+        date=datetime.datetime(2018, 11, 27, 17, 20, 49, tzinfo=UTC),
+        visit=3,
+        type="git",
+        status="full",
+        snapshot=hash_to_bytes("17d0066a4a80aba4a0e913532ee8ff2014f006a9"),
+        metadata=None,
+    ),
+    OriginVisitStatus(
+        origin=ORIGINS[1].url,
+        date=datetime.datetime(2015, 11, 27, 17, 22, 18, tzinfo=UTC),
+        visit=2,
+        type="hg",
+        status="partial",
+        snapshot=hash_to_bytes("8ce268b87faf03850693673c3eb5c9bb66e1ca38"),
+        metadata=None,
+    ),
+]
+
+
+DIRECTORIES = [
+    Directory(id=hash_to_bytes("4b825dc642cb6eb9a060e54bf8d69288fbee4904"), entries=()),
+    Directory(
+        id=hash_to_bytes("21416d920e0ebf0df4a7888bed432873ed5cb3a7"),
+        entries=(
+            DirectoryEntry(
+                name=b"file1.ext",
+                perms=0o644,
+                type="file",
+                target=CONTENTS[0].sha1_git,
+            ),
+            DirectoryEntry(
+                name=b"dir1",
+                perms=0o755,
+                type="dir",
+                target=hash_to_bytes("4b825dc642cb6eb9a060e54bf8d69288fbee4904"),
+            ),
+            DirectoryEntry(
+                name=b"subprepo1", perms=0o160000, type="rev", target=REVISIONS[1].id,
+            ),
+        ),
+    ),
+]
+
+
+SNAPSHOTS = [
+    Snapshot(
+        id=hash_to_bytes("17d0066a4a80aba4a0e913532ee8ff2014f006a9"),
+        branches={
+            b"master": SnapshotBranch(
+                target_type=TargetType.REVISION, target=REVISIONS[0].id
+            )
+        },
+    ),
+    Snapshot(
+        id=hash_to_bytes("8ce268b87faf03850693673c3eb5c9bb66e1ca38"),
+        branches={
+            b"target/revision": SnapshotBranch(
+                target_type=TargetType.REVISION, target=REVISIONS[0].id,
+            ),
+            b"target/alias": SnapshotBranch(
+                target_type=TargetType.ALIAS, target=b"target/revision"
+            ),
+            b"target/directory": SnapshotBranch(
+                target_type=TargetType.DIRECTORY, target=DIRECTORIES[0].id,
+            ),
+            b"target/release": SnapshotBranch(
+                target_type=TargetType.RELEASE, target=RELEASES[0].id
+            ),
+            b"target/snapshot": SnapshotBranch(
+                target_type=TargetType.SNAPSHOT,
+                target=hash_to_bytes("17d0066a4a80aba4a0e913532ee8ff2014f006a9"),
+            ),
+        },
+    ),
+]
+
+
+METADATA_AUTHORITIES = [
+    MetadataAuthority(
+        type=MetadataAuthorityType.FORGE, url="http://example.org/", metadata={},
+    ),
+]
+
+METADATA_FETCHERS = [
+    MetadataFetcher(name="test-fetcher", version="1.0.0", metadata={},)
+]
+
+RAW_EXTRINSIC_METADATA = [
+    RawExtrinsicMetadata(
+        type=MetadataTargetType.ORIGIN,
+        target="http://example.org/foo.git",
+        discovery_date=datetime.datetime(2020, 7, 30, 17, 8, 20, tzinfo=UTC),
+        authority=attr.evolve(METADATA_AUTHORITIES[0], metadata=None),
+        fetcher=attr.evolve(METADATA_FETCHERS[0], metadata=None),
+        format="json",
+        metadata=b'{"foo": "bar"}',
+    ),
+    RawExtrinsicMetadata(
+        type=MetadataTargetType.CONTENT,
+        target=SWHID(
+            object_type="content", object_id=hash_to_hex(CONTENTS[0].sha1_git)
+        ),
+        discovery_date=datetime.datetime(2020, 7, 30, 17, 8, 20, tzinfo=UTC),
+        authority=attr.evolve(METADATA_AUTHORITIES[0], metadata=None),
+        fetcher=attr.evolve(METADATA_FETCHERS[0], metadata=None),
+        format="json",
+        metadata=b'{"foo": "bar"}',
+    ),
+]
+
+
+TEST_OBJECTS: Dict[str, Sequence[BaseModel]] = {
+    "content": CONTENTS,
+    "directory": DIRECTORIES,
+    "metadata_authority": METADATA_AUTHORITIES,
+    "metadata_fetcher": METADATA_FETCHERS,
+    "origin": ORIGINS,
+    "origin_visit": ORIGIN_VISITS,
+    "origin_visit_status": ORIGIN_VISIT_STATUSES,
+    "raw_extrinsic_metadata": RAW_EXTRINSIC_METADATA,
+    "release": RELEASES,
+    "revision": REVISIONS,
+    "snapshot": SNAPSHOTS,
+    "skipped_content": SKIPPED_CONTENTS,
+}
diff --git a/swh/model/tests/test_swh_model_data.py b/swh/model/tests/test_swh_model_data.py
new file mode 100644
index 00000000..7b50e60e
--- /dev/null
+++ b/swh/model/tests/test_swh_model_data.py
@@ -0,0 +1,35 @@
+# Copyright (C) 2021  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import attr
+import pytest
+
+from swh.model.tests.swh_model_data import TEST_OBJECTS
+
+
+@pytest.mark.parametrize("object_type, objects", TEST_OBJECTS.items())
+def test_swh_model_data(object_type, objects):
+    """checks model objects in swh_model_data are in correct shape"""
+    assert objects
+    for obj in objects:
+        assert obj.object_type == object_type
+        attr.validate(obj)
+
+
+def test_ensure_visit_visit_status_date_consistency():
+    """ensure origin-visit-status dates are more recent than their visit counterpart
+
+    The origin-visit-status dates needs to be shifted slightly in the future from their
+    visit dates counterpart. Otherwise, we are hitting storage-wise the "on conflict"
+    ignore policy (because origin-visit-add creates an origin-visit-status with the same
+    parameters from the origin-visit {origin, visit, date}...
+
+    """
+    visits = TEST_OBJECTS["origin_visit"]
+    visit_statuses = TEST_OBJECTS["origin_visit_status"]
+    for visit, visit_status in zip(visits, visit_statuses):
+        assert visit.origin == visit_status.origin
+        assert visit.visit == visit_status.visit
+        assert visit.date < visit_status.date
-- 
GitLab