From 56891b943a20b13e68af25aa52939435fc5640b4 Mon Sep 17 00:00:00 2001 From: Nicolas Dandrimont <nicolas@dandrimont.eu> Date: Mon, 30 Nov 2015 11:20:14 +0100 Subject: [PATCH] identifiers: make the content identifier all its hashes --- swh/model/identifiers.py | 13 +++++++++---- swh/model/tests/test_identifiers.py | 5 ++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index c088bc1a..a0e1f89b 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -77,21 +77,26 @@ def identifier_to_str(identifier): def content_identifier(content): """Return the intrinsic identifier for a content. - A content's identifier is the sha1 checksum of its data. + A content's identifier is the sha1, sha1_git and sha256 checksums of its + data. Args: content: a content conforming to the Software Heritage schema Returns: - The intrinsic identifier of the content as a text string. + A dictionary with all the hashes for the data Raises: KeyError if the content doesn't have a data member. + """ - hashes = hashutil.hash_data(content['data'], {'sha1'}) + hashes = hashutil.hash_data( + content['data'], + {'sha1', 'sha1_git', 'sha256'}, + ) - return hashes['sha1'] + return hashes def _sort_key(entry): diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index d5537d63..ea45de9b 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -96,13 +96,12 @@ class ContentIdentifier(unittest.TestCase): tzinfo=datetime.timezone.utc), } - self.content.update( - hashutil.hash_data(self.content['data'])) + self.content_id = hashutil.hash_data(self.content['data']) @istest def content_identifier(self): self.assertEqual(identifiers.content_identifier(self.content), - self.content['sha1']) + self.content_id) class DirectoryIdentifier(unittest.TestCase): -- GitLab