From ee678042f9bb38bf3616c250a95dbbc8077d0dca Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Tue, 19 Jul 2022 15:06:06 +0200
Subject: [PATCH] Call objstorage.get() with a HashDict instead of single hash

Hash dicts are now prefered by swh-objstorage, in order to support
individual hash collisions.
---
 swh/vault/cache.py            | 15 ++++++++-------
 swh/vault/cookers/git_bare.py |  2 +-
 swh/vault/tests/test_cache.py |  5 ++---
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/swh/vault/cache.py b/swh/vault/cache.py
index bd88b9f..488926e 100644
--- a/swh/vault/cache.py
+++ b/swh/vault/cache.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2017  The Software Heritage developers
+# Copyright (C) 2016-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -6,7 +6,7 @@
 from swh.model import hashutil
 from swh.model.swhids import CoreSWHID
 from swh.objstorage.factory import get_objstorage
-from swh.objstorage.objstorage import compute_hash
+from swh.objstorage.interface import ObjId
 
 
 class VaultCache:
@@ -25,15 +25,16 @@ class VaultCache:
 
     def get(self, bundle_type, swhid: CoreSWHID) -> bytes:
         sid = self._get_internal_id(bundle_type, swhid)
-        return self.objstorage.get(hashutil.hash_to_bytes(sid))
+        return self.objstorage.get(sid)
 
     def delete(self, bundle_type, swhid: CoreSWHID):
         sid = self._get_internal_id(bundle_type, swhid)
-        return self.objstorage.delete(hashutil.hash_to_bytes(sid))
+        return self.objstorage.delete(sid)
 
     def is_cached(self, bundle_type, swhid: CoreSWHID) -> bool:
         sid = self._get_internal_id(bundle_type, swhid)
-        return hashutil.hash_to_bytes(sid) in self.objstorage
+        return sid in self.objstorage
 
-    def _get_internal_id(self, bundle_type, swhid: CoreSWHID):
-        return compute_hash("{}:{}".format(bundle_type, swhid).encode())
+    def _get_internal_id(self, bundle_type, swhid: CoreSWHID) -> ObjId:
+        key = "{}:{}".format(bundle_type, swhid).encode()
+        return hashutil.MultiHash.from_data(key).digest()
diff --git a/swh/vault/cookers/git_bare.py b/swh/vault/cookers/git_bare.py
index c45b96a..4e76376 100644
--- a/swh/vault/cookers/git_bare.py
+++ b/swh/vault/cookers/git_bare.py
@@ -695,7 +695,7 @@ class GitBareCooker(BaseVaultCooker):
         else:
             contents_and_data = zip(
                 visible_contents,
-                self.objstorage.get_batch(c.sha1 for c in visible_contents),
+                self.objstorage.get_batch(c.hashes() for c in visible_contents),
             )
 
         for (content, datum) in contents_and_data:
diff --git a/swh/vault/tests/test_cache.py b/swh/vault/tests/test_cache.py
index d843b8b..35f00b3 100644
--- a/swh/vault/tests/test_cache.py
+++ b/swh/vault/tests/test_cache.py
@@ -1,10 +1,9 @@
-# Copyright (C) 2017  The Software Heritage developers
+# Copyright (C) 2017-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
-from swh.model import hashutil
 from swh.model.swhids import CoreSWHID
 
 TEST_TYPE_1 = "revision_gitfast"
@@ -28,7 +27,7 @@ TEST_CONTENT_2 = b"test content 2"
 
 def test_internal_id(swh_vault):
     sid = swh_vault.cache._get_internal_id(TEST_TYPE_1, TEST_SWHID_1)
-    assert hashutil.hash_to_hex(sid) == "ec2a99d6b21a68648a9d0c99c5d7c35f69268564"
+    assert sid["sha1"].hex() == "ec2a99d6b21a68648a9d0c99c5d7c35f69268564"
 
 
 def test_simple_add_get(swh_vault):
-- 
GitLab