From fbd4e6775de7a48f4e526f1baf9b6117f07be307 Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com>
Date: Tue, 22 Mar 2016 15:25:15 +0100
Subject: [PATCH] Use of optional extra-headers in git commit sha1 computation

---
 README-dev.md                       | 19 ++++++++++++++++
 swh/model/identifiers.py            | 19 ++++++++++++----
 swh/model/tests/test_identifiers.py | 35 +++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+), 4 deletions(-)

diff --git a/README-dev.md b/README-dev.md
index aeae180f..ac2abc83 100644
--- a/README-dev.md
+++ b/README-dev.md
@@ -29,6 +29,25 @@ sources:
 - commit_tree_extended: https://github.com/git/git/blob/8d530c4d64ffcc853889f7b385f554d53db375ed/commit.c#L1522
 - commit_tree: https://github.com/git/git/blob/8d530c4d64ffcc853889f7b385f554d53db375ed/commit.c#L1392
 
+Example:
+
+```sh
+$ cat commit.txt
+tree 85a74718d377195e1efd0843ba4f3260bad4fe07
+parent 01e2d0627a9a6edb24c37db45db5ecb31e9de808
+author Linus Torvalds <torvalds@linux-foundation.org> 1436739030 -0700
+committer Linus Torvalds <torvalds@linux-foundation.org> 1436739030 -0700
+svn-repo-uuid 046f1af7-66c2-d61b-5410-ce57b7db7bff
+svn-revision 10
+
+Linux 4.2-rc2
+```
+
+```
+$ cat commit.txt | git hash-object -t commit --stdin
+010d34f384fa99d047cdd5e2f41e56e5c2feee45
+```
+
 ### directory/tree
 
 sha1 git directory/tree computation:
diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
index ca255a81..55eac96e 100644
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -262,12 +262,23 @@ def revision_identifier(revision):
         b' ', format_date_offset(revision['date']), b'\n',
         b'committer ', format_author(revision['committer']),
         b' ', format_date_offset(revision['committer_date']), b'\n',
-        b'\n',
-        revision['message'],
     ])
 
-    return identifier_to_str(hashutil.hash_git_data(b''.join(components),
-                                                    'commit'))
+    metadata = revision.get('metadata', {})
+    if 'extra-headers' in metadata:
+        headers = metadata['extra-headers']
+        keys = list(headers.keys())
+        keys.sort()
+        for header_key in keys:
+            val = headers[header_key]
+            if isinstance(val, int):
+                val = str(val).encode('utf-8')
+            components.extend([header_key, b' ', val, b'\n'])
+
+    components.extend([b'\n', revision['message']])
+
+    commit_raw = b''.join(components)
+    return identifier_to_str(hashutil.hash_git_data(commit_raw, 'commit'))
 
 
 def target_type_to_git(target_type):
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
index da221df0..6bbff3f6 100644
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -270,6 +270,32 @@ class RevisionIdentifier(unittest.TestCase):
 
         }
 
+        # cat commit.txt | git hash-object -t commit --stdin
+        self.revision_with_extra_headers = {
+            'id': '010d34f384fa99d047cdd5e2f41e56e5c2feee45',
+            'directory': '85a74718d377195e1efd0843ba4f3260bad4fe07',
+            'parents': ['01e2d0627a9a6edb24c37db45db5ecb31e9de808'],
+            'author': {
+                'name': b'Linus Torvalds',
+                'email': b'torvalds@linux-foundation.org',
+            },
+            'date': datetime.datetime(2015, 7, 12, 15, 10, 30,
+                                      tzinfo=linus_tz),
+            'committer': {
+                'name': b'Linus Torvalds',
+                'email': b'torvalds@linux-foundation.org',
+            },
+            'committer_date': datetime.datetime(2015, 7, 12, 15, 10, 30,
+                                                tzinfo=linus_tz),
+            'message': b'Linux 4.2-rc2\n',
+            'metadata': {
+                'extra-headers': {
+                    b'svn-revision': 10,
+                    b'svn-repo-uuid': b'046f1af7-66c2-d61b-5410-ce57b7db7bff',
+                }
+            }
+        }
+
     @istest
     def revision_identifier(self):
         self.assertEqual(
@@ -284,6 +310,15 @@ class RevisionIdentifier(unittest.TestCase):
             identifiers.identifier_to_str(self.synthetic_revision['id']),
         )
 
+    @istest
+    def revision_identifier_with_extra_headers(self):
+        self.assertEqual(
+            identifiers.revision_identifier(
+                self.revision_with_extra_headers),
+            identifiers.identifier_to_str(
+                self.revision_with_extra_headers['id']),
+        )
+
 
 class ReleaseIdentifier(unittest.TestCase):
     def setUp(self):
-- 
GitLab