Skip to content
Snippets Groups Projects
Commit 56891b94 authored by Nicolas Dandrimont's avatar Nicolas Dandrimont
Browse files

identifiers: make the content identifier all its hashes

parent ba2b9b5b
No related branches found
No related tags found
No related merge requests found
......@@ -77,21 +77,26 @@ def identifier_to_str(identifier):
def content_identifier(content):
"""Return the intrinsic identifier for a content.
A content's identifier is the sha1 checksum of its data.
A content's identifier is the sha1, sha1_git and sha256 checksums of its
data.
Args:
content: a content conforming to the Software Heritage schema
Returns:
The intrinsic identifier of the content as a text string.
A dictionary with all the hashes for the data
Raises:
KeyError if the content doesn't have a data member.
"""
hashes = hashutil.hash_data(content['data'], {'sha1'})
hashes = hashutil.hash_data(
content['data'],
{'sha1', 'sha1_git', 'sha256'},
)
return hashes['sha1']
return hashes
def _sort_key(entry):
......
......@@ -96,13 +96,12 @@ class ContentIdentifier(unittest.TestCase):
tzinfo=datetime.timezone.utc),
}
self.content.update(
hashutil.hash_data(self.content['data']))
self.content_id = hashutil.hash_data(self.content['data'])
@istest
def content_identifier(self):
self.assertEqual(identifiers.content_identifier(self.content),
self.content['sha1'])
self.content_id)
class DirectoryIdentifier(unittest.TestCase):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment