From 7d5dd13175324cb18a3b24c726f1700c3b06d6e0 Mon Sep 17 00:00:00 2001 From: Aymeric Varasse <varasterix@softwareheritage.org> Date: Wed, 26 Mar 2025 18:17:08 +0100 Subject: [PATCH] Fix (some) tests and mistakes --- swh/export/exporters/edges.py | 4 +- swh/export/exporters/orc.py | 4 +- swh/export/test/test_edges.py | 16 +++++-- swh/export/test/test_journal_processor.py | 2 +- swh/export/test/test_orc.py | 57 ++++++++++++----------- swh/export/utils.py | 4 +- 6 files changed, 48 insertions(+), 39 deletions(-) diff --git a/swh/export/exporters/edges.py b/swh/export/exporters/edges.py index d823108..0f9c3ba 100644 --- a/swh/export/exporters/edges.py +++ b/swh/export/exporters/edges.py @@ -107,14 +107,14 @@ class GraphEdgesExporter(ExporterDispatch): self.write_node((ExtendedObjectType.SNAPSHOT, snapshot.id)) for branch_name, branch in snapshot.branches.items(): original_branch_name = branch_name - while branch and branch.target_type == "alias": + while branch and branch.target_type.value == "alias": branch_name = branch.target branch = snapshot.branches.get(branch_name) if branch is None or not branch_name: continue self.write_edge( (ExtendedObjectType.SNAPSHOT, snapshot.id), - (ExtendedObjectType[branch.target_type.name], branch.target), + (ExtendedObjectType[branch.target_type.value.upper()], branch.target), labels=[ base64.b64encode(original_branch_name).decode(), ], diff --git a/swh/export/exporters/orc.py b/swh/export/exporters/orc.py index 342cd75..ad153c1 100644 --- a/swh/export/exporters/orc.py +++ b/swh/export/exporters/orc.py @@ -332,7 +332,7 @@ class ORCExporter(ExporterDispatch): ), *swh_date_to_tuple(revision.committer_date), hash_to_hex_or_none(revision.directory), - revision.type.name, + revision.type.value, revision.raw_manifest, ) ) @@ -386,7 +386,7 @@ class ORCExporter(ExporterDispatch): data = None if self.with_data: try: - data = self.objstorage.get(content) + data = self.objstorage.get(content.to_dict()) except ObjNotFoundError: # WARNING: I'm not sure this is right logger.warning( diff --git a/swh/export/test/test_edges.py b/swh/export/test/test_edges.py index 4512415..be00a57 100644 --- a/swh/export/test/test_edges.py +++ b/swh/export/test/test_edges.py @@ -5,6 +5,7 @@ from base64 import b64encode import collections +from datetime import datetime import hashlib from typing import Tuple from unittest.mock import Mock, call @@ -12,6 +13,7 @@ from unittest.mock import Mock, call import pytest from swh.export.exporters.edges import GraphEdgesExporter, sort_graph_nodes +from swh.export.journalprocessor import _turn_message_into_objects from swh.export.utils import ZSTFile from swh.model.hashutil import MultiHash, hash_to_bytes from swh.model.model import ModelObjectType @@ -50,7 +52,7 @@ TEST_RELEASE = { "offset": 120, "negative_utc": False, }, - "author": {"author": {"fullname": b"foo", "name": b"foo", "email": b""}}, + "author": {"fullname": b"foo", "name": b"foo", "email": b""}, "target_type": "revision", "target": b"\x04" * 20, "message": b"foo", @@ -63,7 +65,7 @@ TEST_ORIGIN_2 = {"url": "https://somewhere.org/den/fox/2"} TEST_ORIGIN_VISIT_STATUS = { "origin": TEST_ORIGIN["url"], "visit": 1, - "date": "2013-05-07 04:20:39.369271+00:00", + "date": datetime.fromisoformat("2013-05-07 04:20:39.369271+00:00"), "snapshot": None, # TODO "status": "ongoing", # TODO "metadata": {"foo": "bar"}, @@ -87,7 +89,9 @@ class FakeDiskSet(set): @pytest.fixture def exporter(): - def wrapped(messages, config=None) -> Tuple[Mock, Mock]: + def wrapped( + messages: dict[ModelObjectType, list[dict]], config=None + ) -> Tuple[Mock, Mock]: if config is None: config = {} exporter = GraphEdgesExporter(config, "/dummy_path", "/dummy_sensitive_path") @@ -97,8 +101,10 @@ def exporter(): node_writer, edge_writer, ) - for object_type, objects in messages.items(): - for obj in objects: + for object_type, message_list in messages.items(): + for message in message_list: + obj = _turn_message_into_objects(object_type.value, (b"", message))[1] + assert obj is not None exporter.process_object(object_type, obj) return node_writer.write, edge_writer.write diff --git a/swh/export/test/test_journal_processor.py b/swh/export/test/test_journal_processor.py index 9a5279e..8abe38b 100644 --- a/swh/export/test/test_journal_processor.py +++ b/swh/export/test/test_journal_processor.py @@ -74,7 +74,7 @@ def assert_exported_objects( def key(obj): """bare minimum to get a deterministic order""" return (obj[0],) + tuple( - obj[1].get(k) for k in ("id", "url", "origin", "visit", "date") + getattr(obj[1], k, None) for k in ("id", "url", "origin", "visit", "date") ) assert sorted(exported_objects, key=key) == sorted( diff --git a/swh/export/test/test_orc.py b/swh/export/test/test_orc.py index 785bc3c..211de6b 100644 --- a/swh/export/test/test_orc.py +++ b/swh/export/test/test_orc.py @@ -26,6 +26,7 @@ from swh.model.model import ( Revision, SkippedContent, Snapshot, + TimestampWithTimezone, ) from swh.model.tests.swh_model_data import TEST_OBJECTS from swh.objstorage.factory import get_objstorage @@ -58,7 +59,7 @@ def orc_export(messages, config=None, tmpdir=None, sensitive_tmpdir=None): with orc.ORCExporter(config, tmpdir, sensitive_tmpdir) as exporter: for object_type, objects in messages.items(): for obj in objects: - exporter.process_object(object_type, obj.to_dict()) + exporter.process_object(object_type, obj) yield tmpdir @@ -143,9 +144,7 @@ def test_export_release(): orc.hash_to_hex_or_none(obj.target), obj.target_type.value, obj.author.fullname if obj.author else None, - *orc.swh_date_to_tuple( - obj.date.to_dict() if obj.date is not None else None - ), + *orc.swh_date_to_tuple(getattr(obj, "date", None)), obj.raw_manifest, ) in output[obj_type.value] @@ -158,13 +157,9 @@ def test_export_revision(): orc.hash_to_hex_or_none(obj.id), obj.message, obj.author.fullname, - *orc.swh_date_to_tuple( - obj.date.to_dict() if obj.date is not None else None - ), + *orc.swh_date_to_tuple(getattr(obj, "date", None)), obj.committer.fullname, - *orc.swh_date_to_tuple( - obj.committer_date.to_dict() if obj.committer_date is not None else None - ), + *orc.swh_date_to_tuple(getattr(obj, "committer_date", None)), orc.hash_to_hex_or_none(obj.directory), obj.type.value, obj.raw_manifest, @@ -226,35 +221,43 @@ def test_export_skipped_content(): def test_date_to_tuple(): ts = {"seconds": 123456, "microseconds": 1515} - assert orc.swh_date_to_tuple({"timestamp": ts, "offset_bytes": b"+0100"}) == ( + assert orc.swh_date_to_tuple( + TimestampWithTimezone.from_dict({"timestamp": ts, "offset_bytes": b"+0100"}) + ) == ( (123456, 1515), 60, b"+0100", ) assert orc.swh_date_to_tuple( - { - "timestamp": ts, - "offset": 120, - "negative_utc": False, - "offset_bytes": b"+0100", - } + TimestampWithTimezone.from_dict( + { + "timestamp": ts, + "offset": 120, + "negative_utc": False, + "offset_bytes": b"+0100", + } + ) ) == ((123456, 1515), 60, b"+0100") assert orc.swh_date_to_tuple( - { - "timestamp": ts, - "offset": 120, - "negative_utc": False, - } + TimestampWithTimezone.from_dict( + { + "timestamp": ts, + "offset": 120, + "negative_utc": False, + } + ) ) == ((123456, 1515), 120, b"+0200") assert orc.swh_date_to_tuple( - { - "timestamp": ts, - "offset": 0, - "negative_utc": True, - } + TimestampWithTimezone.from_dict( + { + "timestamp": ts, + "offset": 0, + "negative_utc": True, + } + ) ) == ( (123456, 1515), 0, diff --git a/swh/export/utils.py b/swh/export/utils.py index 478e65c..fa8d7bc 100644 --- a/swh/export/utils.py +++ b/swh/export/utils.py @@ -140,8 +140,8 @@ def remove_pull_requests(snapshot: Snapshot) -> Snapshot: snapshot_dict = snapshot.to_dict() for branch_name, branch in list(snapshot.branches.items()): original_branch_name = branch_name - while branch and branch.get("target_type") == "alias": - branch_name = branch["target"] + while branch and branch.target_type.value == "alias": + branch_name = branch.target branch = snapshot.branches.get(branch_name) if branch is None or not branch_name: continue -- GitLab