From 7d5dd13175324cb18a3b24c726f1700c3b06d6e0 Mon Sep 17 00:00:00 2001
From: Aymeric Varasse <varasterix@softwareheritage.org>
Date: Wed, 26 Mar 2025 18:17:08 +0100
Subject: [PATCH] Fix (some) tests and mistakes

---
 swh/export/exporters/edges.py             |  4 +-
 swh/export/exporters/orc.py               |  4 +-
 swh/export/test/test_edges.py             | 16 +++++--
 swh/export/test/test_journal_processor.py |  2 +-
 swh/export/test/test_orc.py               | 57 ++++++++++++-----------
 swh/export/utils.py                       |  4 +-
 6 files changed, 48 insertions(+), 39 deletions(-)

diff --git a/swh/export/exporters/edges.py b/swh/export/exporters/edges.py
index d823108..0f9c3ba 100644
--- a/swh/export/exporters/edges.py
+++ b/swh/export/exporters/edges.py
@@ -107,14 +107,14 @@ class GraphEdgesExporter(ExporterDispatch):
         self.write_node((ExtendedObjectType.SNAPSHOT, snapshot.id))
         for branch_name, branch in snapshot.branches.items():
             original_branch_name = branch_name
-            while branch and branch.target_type == "alias":
+            while branch and branch.target_type.value == "alias":
                 branch_name = branch.target
                 branch = snapshot.branches.get(branch_name)
             if branch is None or not branch_name:
                 continue
             self.write_edge(
                 (ExtendedObjectType.SNAPSHOT, snapshot.id),
-                (ExtendedObjectType[branch.target_type.name], branch.target),
+                (ExtendedObjectType[branch.target_type.value.upper()], branch.target),
                 labels=[
                     base64.b64encode(original_branch_name).decode(),
                 ],
diff --git a/swh/export/exporters/orc.py b/swh/export/exporters/orc.py
index 342cd75..ad153c1 100644
--- a/swh/export/exporters/orc.py
+++ b/swh/export/exporters/orc.py
@@ -332,7 +332,7 @@ class ORCExporter(ExporterDispatch):
                 ),
                 *swh_date_to_tuple(revision.committer_date),
                 hash_to_hex_or_none(revision.directory),
-                revision.type.name,
+                revision.type.value,
                 revision.raw_manifest,
             )
         )
@@ -386,7 +386,7 @@ class ORCExporter(ExporterDispatch):
         data = None
         if self.with_data:
             try:
-                data = self.objstorage.get(content)
+                data = self.objstorage.get(content.to_dict())
             except ObjNotFoundError:
                 # WARNING: I'm not sure this is right
                 logger.warning(
diff --git a/swh/export/test/test_edges.py b/swh/export/test/test_edges.py
index 4512415..be00a57 100644
--- a/swh/export/test/test_edges.py
+++ b/swh/export/test/test_edges.py
@@ -5,6 +5,7 @@
 
 from base64 import b64encode
 import collections
+from datetime import datetime
 import hashlib
 from typing import Tuple
 from unittest.mock import Mock, call
@@ -12,6 +13,7 @@ from unittest.mock import Mock, call
 import pytest
 
 from swh.export.exporters.edges import GraphEdgesExporter, sort_graph_nodes
+from swh.export.journalprocessor import _turn_message_into_objects
 from swh.export.utils import ZSTFile
 from swh.model.hashutil import MultiHash, hash_to_bytes
 from swh.model.model import ModelObjectType
@@ -50,7 +52,7 @@ TEST_RELEASE = {
         "offset": 120,
         "negative_utc": False,
     },
-    "author": {"author": {"fullname": b"foo", "name": b"foo", "email": b""}},
+    "author": {"fullname": b"foo", "name": b"foo", "email": b""},
     "target_type": "revision",
     "target": b"\x04" * 20,
     "message": b"foo",
@@ -63,7 +65,7 @@ TEST_ORIGIN_2 = {"url": "https://somewhere.org/den/fox/2"}
 TEST_ORIGIN_VISIT_STATUS = {
     "origin": TEST_ORIGIN["url"],
     "visit": 1,
-    "date": "2013-05-07 04:20:39.369271+00:00",
+    "date": datetime.fromisoformat("2013-05-07 04:20:39.369271+00:00"),
     "snapshot": None,  # TODO
     "status": "ongoing",  # TODO
     "metadata": {"foo": "bar"},
@@ -87,7 +89,9 @@ class FakeDiskSet(set):
 
 @pytest.fixture
 def exporter():
-    def wrapped(messages, config=None) -> Tuple[Mock, Mock]:
+    def wrapped(
+        messages: dict[ModelObjectType, list[dict]], config=None
+    ) -> Tuple[Mock, Mock]:
         if config is None:
             config = {}
         exporter = GraphEdgesExporter(config, "/dummy_path", "/dummy_sensitive_path")
@@ -97,8 +101,10 @@ def exporter():
             node_writer,
             edge_writer,
         )
-        for object_type, objects in messages.items():
-            for obj in objects:
+        for object_type, message_list in messages.items():
+            for message in message_list:
+                obj = _turn_message_into_objects(object_type.value, (b"", message))[1]
+                assert obj is not None
                 exporter.process_object(object_type, obj)
         return node_writer.write, edge_writer.write
 
diff --git a/swh/export/test/test_journal_processor.py b/swh/export/test/test_journal_processor.py
index 9a5279e..8abe38b 100644
--- a/swh/export/test/test_journal_processor.py
+++ b/swh/export/test/test_journal_processor.py
@@ -74,7 +74,7 @@ def assert_exported_objects(
     def key(obj):
         """bare minimum to get a deterministic order"""
         return (obj[0],) + tuple(
-            obj[1].get(k) for k in ("id", "url", "origin", "visit", "date")
+            getattr(obj[1], k, None) for k in ("id", "url", "origin", "visit", "date")
         )
 
     assert sorted(exported_objects, key=key) == sorted(
diff --git a/swh/export/test/test_orc.py b/swh/export/test/test_orc.py
index 785bc3c..211de6b 100644
--- a/swh/export/test/test_orc.py
+++ b/swh/export/test/test_orc.py
@@ -26,6 +26,7 @@ from swh.model.model import (
     Revision,
     SkippedContent,
     Snapshot,
+    TimestampWithTimezone,
 )
 from swh.model.tests.swh_model_data import TEST_OBJECTS
 from swh.objstorage.factory import get_objstorage
@@ -58,7 +59,7 @@ def orc_export(messages, config=None, tmpdir=None, sensitive_tmpdir=None):
             with orc.ORCExporter(config, tmpdir, sensitive_tmpdir) as exporter:
                 for object_type, objects in messages.items():
                     for obj in objects:
-                        exporter.process_object(object_type, obj.to_dict())
+                        exporter.process_object(object_type, obj)
             yield tmpdir
 
 
@@ -143,9 +144,7 @@ def test_export_release():
             orc.hash_to_hex_or_none(obj.target),
             obj.target_type.value,
             obj.author.fullname if obj.author else None,
-            *orc.swh_date_to_tuple(
-                obj.date.to_dict() if obj.date is not None else None
-            ),
+            *orc.swh_date_to_tuple(getattr(obj, "date", None)),
             obj.raw_manifest,
         ) in output[obj_type.value]
 
@@ -158,13 +157,9 @@ def test_export_revision():
             orc.hash_to_hex_or_none(obj.id),
             obj.message,
             obj.author.fullname,
-            *orc.swh_date_to_tuple(
-                obj.date.to_dict() if obj.date is not None else None
-            ),
+            *orc.swh_date_to_tuple(getattr(obj, "date", None)),
             obj.committer.fullname,
-            *orc.swh_date_to_tuple(
-                obj.committer_date.to_dict() if obj.committer_date is not None else None
-            ),
+            *orc.swh_date_to_tuple(getattr(obj, "committer_date", None)),
             orc.hash_to_hex_or_none(obj.directory),
             obj.type.value,
             obj.raw_manifest,
@@ -226,35 +221,43 @@ def test_export_skipped_content():
 
 def test_date_to_tuple():
     ts = {"seconds": 123456, "microseconds": 1515}
-    assert orc.swh_date_to_tuple({"timestamp": ts, "offset_bytes": b"+0100"}) == (
+    assert orc.swh_date_to_tuple(
+        TimestampWithTimezone.from_dict({"timestamp": ts, "offset_bytes": b"+0100"})
+    ) == (
         (123456, 1515),
         60,
         b"+0100",
     )
 
     assert orc.swh_date_to_tuple(
-        {
-            "timestamp": ts,
-            "offset": 120,
-            "negative_utc": False,
-            "offset_bytes": b"+0100",
-        }
+        TimestampWithTimezone.from_dict(
+            {
+                "timestamp": ts,
+                "offset": 120,
+                "negative_utc": False,
+                "offset_bytes": b"+0100",
+            }
+        )
     ) == ((123456, 1515), 60, b"+0100")
 
     assert orc.swh_date_to_tuple(
-        {
-            "timestamp": ts,
-            "offset": 120,
-            "negative_utc": False,
-        }
+        TimestampWithTimezone.from_dict(
+            {
+                "timestamp": ts,
+                "offset": 120,
+                "negative_utc": False,
+            }
+        )
     ) == ((123456, 1515), 120, b"+0200")
 
     assert orc.swh_date_to_tuple(
-        {
-            "timestamp": ts,
-            "offset": 0,
-            "negative_utc": True,
-        }
+        TimestampWithTimezone.from_dict(
+            {
+                "timestamp": ts,
+                "offset": 0,
+                "negative_utc": True,
+            }
+        )
     ) == (
         (123456, 1515),
         0,
diff --git a/swh/export/utils.py b/swh/export/utils.py
index 478e65c..fa8d7bc 100644
--- a/swh/export/utils.py
+++ b/swh/export/utils.py
@@ -140,8 +140,8 @@ def remove_pull_requests(snapshot: Snapshot) -> Snapshot:
     snapshot_dict = snapshot.to_dict()
     for branch_name, branch in list(snapshot.branches.items()):
         original_branch_name = branch_name
-        while branch and branch.get("target_type") == "alias":
-            branch_name = branch["target"]
+        while branch and branch.target_type.value == "alias":
+            branch_name = branch.target
             branch = snapshot.branches.get(branch_name)
         if branch is None or not branch_name:
             continue
-- 
GitLab