diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index c088bc1ae3764d12b4a57fbc6a8978214c1ab7c2..a0e1f89b1124ed9228bc763d713edffed1834ef2 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -77,21 +77,26 @@ def identifier_to_str(identifier): def content_identifier(content): """Return the intrinsic identifier for a content. - A content's identifier is the sha1 checksum of its data. + A content's identifier is the sha1, sha1_git and sha256 checksums of its + data. Args: content: a content conforming to the Software Heritage schema Returns: - The intrinsic identifier of the content as a text string. + A dictionary with all the hashes for the data Raises: KeyError if the content doesn't have a data member. + """ - hashes = hashutil.hash_data(content['data'], {'sha1'}) + hashes = hashutil.hash_data( + content['data'], + {'sha1', 'sha1_git', 'sha256'}, + ) - return hashes['sha1'] + return hashes def _sort_key(entry): diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index d5537d63476ab2a823c7f12b4463f687ea8b5a24..ea45de9bc6d1c6f7b2c077b4290ea8e6a0eee603 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -96,13 +96,12 @@ class ContentIdentifier(unittest.TestCase): tzinfo=datetime.timezone.utc), } - self.content.update( - hashutil.hash_data(self.content['data'])) + self.content_id = hashutil.hash_data(self.content['data']) @istest def content_identifier(self): self.assertEqual(identifiers.content_identifier(self.content), - self.content['sha1']) + self.content_id) class DirectoryIdentifier(unittest.TestCase):