From e4566a6605ff7896a1701892faac3f2ec9ea7d10 Mon Sep 17 00:00:00 2001
From: Daniele Serafini <me@danieleserafini.eu>
Date: Fri, 18 Jun 2021 15:45:10 +0100
Subject: [PATCH] from_disk: get swhid from Content/Directory objects

Closes T3393
---
 swh/model/cli.py                  | 28 ++++++----------------------
 swh/model/from_disk.py            | 21 ++++++++++++++++++++-
 swh/model/tests/test_from_disk.py | 16 ++++++++++++++++
 3 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/swh/model/cli.py b/swh/model/cli.py
index 7e497608..e547aeb6 100644
--- a/swh/model/cli.py
+++ b/swh/model/cli.py
@@ -57,22 +57,16 @@ class CoreSWHIDParamType(click.ParamType):
 
 def swhid_of_file(path) -> CoreSWHID:
     from swh.model.from_disk import Content
-    from swh.model.hashutil import hash_to_bytes
 
-    object = Content.from_file(path=path).get_data()
-    return CoreSWHID(
-        object_type=ObjectType.CONTENT, object_id=hash_to_bytes(object["sha1_git"])
-    )
+    object = Content.from_file(path=path)
+    return object.swhid()
 
 
 def swhid_of_file_content(data) -> CoreSWHID:
     from swh.model.from_disk import Content
-    from swh.model.hashutil import hash_to_bytes
 
-    object = Content.from_bytes(mode=644, data=data).get_data()
-    return CoreSWHID(
-        object_type=ObjectType.CONTENT, object_id=hash_to_bytes(object["sha1_git"])
-    )
+    object = Content.from_bytes(mode=644, data=data)
+    return object.swhid()
 
 
 def model_of_dir(path: bytes, exclude_patterns: Iterable[bytes] = None) -> Directory:
@@ -88,13 +82,8 @@ def model_of_dir(path: bytes, exclude_patterns: Iterable[bytes] = None) -> Direc
 
 
 def swhid_of_dir(path: bytes, exclude_patterns: Iterable[bytes] = None) -> CoreSWHID:
-    from swh.model.hashutil import hash_to_bytes
-
     obj = model_of_dir(path, exclude_patterns)
-
-    return CoreSWHID(
-        object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(obj.get_data()["id"])
-    )
+    return obj.swhid()
 
 
 def swhid_of_origin(url):
@@ -301,12 +290,7 @@ def identify(
         for sub_obj in dir_obj.iter_tree():
             path_name = "path" if "path" in sub_obj.data.keys() else "data"
             path = os.fsdecode(sub_obj.data[path_name])
-            swhid = str(
-                CoreSWHID(
-                    object_type=ObjectType[sub_obj.object_type.upper()],
-                    object_id=sub_obj.hash,
-                )
-            )
+            swhid = str(sub_obj.swhid())
             msg = f"{swhid}\t{path}" if show_filename else f"{swhid}"
             click.echo(msg)
     else:
diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py
index 78dc1742..f9c836ff 100644
--- a/swh/model/from_disk.py
+++ b/swh/model/from_disk.py
@@ -19,7 +19,12 @@ from typing_extensions import Final
 from . import model
 from .exceptions import InvalidDirectoryPath
 from .hashutil import MultiHash
-from .identifiers import directory_entry_sort_key, directory_identifier
+from .identifiers import (
+    CoreSWHID,
+    ObjectType,
+    directory_entry_sort_key,
+    directory_identifier,
+)
 from .identifiers import identifier_to_bytes as id_to_bytes
 from .identifiers import identifier_to_str as id_to_str
 from .merkle import MerkleLeaf, MerkleNode
@@ -207,6 +212,13 @@ class Content(MerkleLeaf):
         obj = cls(ret)
         return obj
 
+    def swhid(self) -> CoreSWHID:
+        """Return node identifier as a SWHID
+        """
+        return CoreSWHID(
+            object_type=ObjectType.CONTENT, object_id=self.hash
+        )
+
     def __repr__(self):
         return "Content(id=%s)" % id_to_str(self.hash)
 
@@ -482,6 +494,13 @@ class Directory(MerkleNode):
 
         return self.__entries
 
+    def swhid(self) -> CoreSWHID:
+        """Return node identifier as a SWHID
+        """
+        return CoreSWHID(
+            object_type=ObjectType.DIRECTORY, object_id=self.hash
+        )
+
     def compute_hash(self):
         return id_to_bytes(directory_identifier({"entries": self.entries}))
 
diff --git a/swh/model/tests/test_from_disk.py b/swh/model/tests/test_from_disk.py
index 497bf6c0..4a7cb385 100644
--- a/swh/model/tests/test_from_disk.py
+++ b/swh/model/tests/test_from_disk.py
@@ -531,6 +531,22 @@ class TestContent(DataMixin, unittest.TestCase):
             self.assertContentEqual(conv_content, content)
             self.assertIn(hash_to_hex(conv_content.hash), repr(conv_content))
 
+    def test_content_swhid(self):
+        for _, content in self.contents.items():
+            content_res = Content.from_bytes(mode=content["mode"], data=content["data"])
+            content_swhid = "swh:1:cnt:" + hash_to_hex(content["sha1_git"])
+            assert str(content_res.swhid()) == content_swhid
+
+
+class TestDirectory(DataMixin, unittest.TestCase):
+    def setUp(self):
+        super().setUp()
+
+    def test_directory_swhid(self):
+        directory_swhid = "swh:1:dir:" + hash_to_hex(self.empty_directory["id"])
+        directory = Directory.from_disk(path=self.tmpdir_name)
+        assert str(directory.swhid()) == directory_swhid
+
 
 class SymlinkToContent(DataMixin, unittest.TestCase):
     def setUp(self):
-- 
GitLab