Skip to content
Snippets Groups Projects
Commit ee678042 authored by vlorentz's avatar vlorentz
Browse files

Call objstorage.get() with a HashDict instead of single hash

Hash dicts are now prefered by swh-objstorage, in order to support
individual hash collisions.
parent ca71a59e
No related branches found
No related tags found
1 merge request!158Call objstorage.get() with a HashDict instead of single hash
# Copyright (C) 2016-2017 The Software Heritage developers # Copyright (C) 2016-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution # See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version # License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information # See top-level LICENSE file for more information
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
from swh.model import hashutil from swh.model import hashutil
from swh.model.swhids import CoreSWHID from swh.model.swhids import CoreSWHID
from swh.objstorage.factory import get_objstorage from swh.objstorage.factory import get_objstorage
from swh.objstorage.objstorage import compute_hash from swh.objstorage.interface import ObjId
class VaultCache: class VaultCache:
...@@ -25,15 +25,16 @@ class VaultCache: ...@@ -25,15 +25,16 @@ class VaultCache:
def get(self, bundle_type, swhid: CoreSWHID) -> bytes: def get(self, bundle_type, swhid: CoreSWHID) -> bytes:
sid = self._get_internal_id(bundle_type, swhid) sid = self._get_internal_id(bundle_type, swhid)
return self.objstorage.get(hashutil.hash_to_bytes(sid)) return self.objstorage.get(sid)
def delete(self, bundle_type, swhid: CoreSWHID): def delete(self, bundle_type, swhid: CoreSWHID):
sid = self._get_internal_id(bundle_type, swhid) sid = self._get_internal_id(bundle_type, swhid)
return self.objstorage.delete(hashutil.hash_to_bytes(sid)) return self.objstorage.delete(sid)
def is_cached(self, bundle_type, swhid: CoreSWHID) -> bool: def is_cached(self, bundle_type, swhid: CoreSWHID) -> bool:
sid = self._get_internal_id(bundle_type, swhid) sid = self._get_internal_id(bundle_type, swhid)
return hashutil.hash_to_bytes(sid) in self.objstorage return sid in self.objstorage
def _get_internal_id(self, bundle_type, swhid: CoreSWHID): def _get_internal_id(self, bundle_type, swhid: CoreSWHID) -> ObjId:
return compute_hash("{}:{}".format(bundle_type, swhid).encode()) key = "{}:{}".format(bundle_type, swhid).encode()
return hashutil.MultiHash.from_data(key).digest()
...@@ -695,7 +695,7 @@ class GitBareCooker(BaseVaultCooker): ...@@ -695,7 +695,7 @@ class GitBareCooker(BaseVaultCooker):
else: else:
contents_and_data = zip( contents_and_data = zip(
visible_contents, visible_contents,
self.objstorage.get_batch(c.sha1 for c in visible_contents), self.objstorage.get_batch(c.hashes() for c in visible_contents),
) )
for (content, datum) in contents_and_data: for (content, datum) in contents_and_data:
......
# Copyright (C) 2017 The Software Heritage developers # Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution # See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version # License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information # See top-level LICENSE file for more information
from swh.model import hashutil
from swh.model.swhids import CoreSWHID from swh.model.swhids import CoreSWHID
TEST_TYPE_1 = "revision_gitfast" TEST_TYPE_1 = "revision_gitfast"
...@@ -28,7 +27,7 @@ TEST_CONTENT_2 = b"test content 2" ...@@ -28,7 +27,7 @@ TEST_CONTENT_2 = b"test content 2"
def test_internal_id(swh_vault): def test_internal_id(swh_vault):
sid = swh_vault.cache._get_internal_id(TEST_TYPE_1, TEST_SWHID_1) sid = swh_vault.cache._get_internal_id(TEST_TYPE_1, TEST_SWHID_1)
assert hashutil.hash_to_hex(sid) == "ec2a99d6b21a68648a9d0c99c5d7c35f69268564" assert sid["sha1"].hex() == "ec2a99d6b21a68648a9d0c99c5d7c35f69268564"
def test_simple_add_get(swh_vault): def test_simple_add_get(swh_vault):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment