diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index c226dfef9deac339dff590211947174e2226d317..90f3db0ae46af7850ad5030b8cd5fead4c53f0e1 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -246,6 +246,53 @@ def format_author(author): def revision_identifier(revision): """Return the intrinsic identifier for a revision. + + The fields used for the revision identifier computation are: + - directory + - parents + - author + - author_date + - committer + - committer_date + - metadata -> extra_headers + - message + + A revision's identifier is the 'git'-checksum of a commit manifest + constructed as follows (newlines are a single ASCII newline character): + + ``` + tree <directory identifier> + [for each parent in parents] + parent <parent identifier> + [end for each parents] + author <author> <author_date> + committer <committer> <committer_date> + [for each key, value in extra_headers] + <key> <encoded value> + [end for each extra_headers] + + <message> + ``` + + The directory identifier is the ascii representation of its hexadecimal + encoding. + + Author and committer are formatted with the `format_author` function. + Dates are formatted with the `format_date_offset` function. + + Extra headers are an ordered list of [key, value] pairs. Keys are strings + and get encoded to utf-8 for identifier computation. Values are either byte + strings, unicode strings (that get encoded to utf-8), or integers (that get + encoded to their utf-8 decimal representation). + + Multiline extra header values are escaped by indenting the continuation + lines with one ascii space. + + The headers are separated from the commit message with an empty line. + + The checksum of the full manifest is computed using the 'commit' git object + type. + """ components = [ b'tree ', identifier_to_str(revision['directory']).encode(), b'\n', @@ -263,29 +310,25 @@ def revision_identifier(revision): b' ', format_date_offset(revision['committer_date']), b'\n', ]) + # Handle extra headers metadata = revision.get('metadata', {}) - if 'gpgsig' in metadata: - gpgsig = metadata['gpgsig'] - if isinstance(gpgsig, str): - gpgsig = gpgsig.encode('utf-8') - components.extend([b'gpgsig', b' ', gpgsig, b'\n']) - - if 'extra_headers' in metadata: - headers = metadata['extra_headers'] - keys = list(headers.keys()) - keys.sort() - for header_key in keys: - val = headers[header_key] - if isinstance(val, int): - val = str(val).encode('utf-8') - if isinstance(val, str): - val = val.encode('utf-8') - if isinstance(header_key, str): - key = header_key.encode('utf-8') - else: - key = header_key - - components.extend([key, b' ', val, b'\n']) + for key, value in metadata.get('extra_headers', []): + + # Integer values: decimal representation + if isinstance(value, int): + value = str(value).encode('utf-8') + + # Unicode string values: utf-8 encoding + if isinstance(value, str): + value = value.encode('utf-8') + + # multi-line values: indent continuation lines + if b'\n' in value: + value_chunks = value.split(b'\n') + value = b'\n '.join(value_chunks) + + # encode the key to utf-8 + components.extend([key.encode('utf-8'), b' ', value, b'\n']) components.extend([b'\n', revision['message']]) diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index 5993313fc30e7470a8be3629c13d1d219097056c..834b923b27db32a711f0c5a54d7230181d49b33d 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -220,22 +220,22 @@ class RevisionIdentifier(unittest.TestCase): linus_tz = datetime.timezone(datetime.timedelta(minutes=-420)) linus_gpgsig = b'''\ -----BEGIN PGP SIGNATURE----- - Version: GnuPG v1.4.13 (Darwin) -\x20 - iQIcBAABAgAGBQJVJcYsAAoJEBiY3kIkQRNJVAUQAJ8/XQIfMqqC5oYeEFfHOPYZ - L7qy46bXHVBa9Qd8zAJ2Dou3IbI2ZoF6/Et89K/UggOycMlt5FKV/9toWyuZv4Po - L682wonoxX99qvVTHo6+wtnmYO7+G0f82h+qHMErxjP+I6gzRNBvRr+SfY7VlGdK - wikMKOMWC5smrScSHITnOq1Ews5pe3N7qDYMzK0XVZmgDoaem4RSWMJs4My/qVLN - e0CqYWq2A22GX7sXl6pjneJYQvcAXUX+CAzp24QnPSb+Q22Guj91TcxLFcHCTDdn - qgqMsEyMiisoglwrCbO+D+1xq9mjN9tNFWP66SQ48mrrHYTBV5sz9eJyDfroJaLP - CWgbDTgq6GzRMehHT3hXfYS5NNatjnhkNISXR7pnVP/obIi/vpWh5ll6Gd8q26z+ - a/O41UzOaLTeNI365MWT4/cnXohVLRG7iVJbAbCxoQmEgsYMRc/pBAzWJtLfcB2G - jdTswYL6+MUdL8sB9pZ82D+BP/YAdHe69CyTu1lk9RT2pYtI/kkfjHubXBCYEJSG - +VGllBbYG6idQJpyrOYNRJyrDi9yvDJ2W+S0iQrlZrxzGBVGTB/y65S8C+2WTBcE - lf1Qb5GDsQrZWgD+jtWTywOYHtCBwyCKSAXxSARMbNPeak9WPlcW/Jmu+fUcMe2x - dg1KdHOa34shrKDaOVzW - =od6m - -----END PGP SIGNATURE-----''' +Version: GnuPG v1.4.13 (Darwin) + +iQIcBAABAgAGBQJVJcYsAAoJEBiY3kIkQRNJVAUQAJ8/XQIfMqqC5oYeEFfHOPYZ +L7qy46bXHVBa9Qd8zAJ2Dou3IbI2ZoF6/Et89K/UggOycMlt5FKV/9toWyuZv4Po +L682wonoxX99qvVTHo6+wtnmYO7+G0f82h+qHMErxjP+I6gzRNBvRr+SfY7VlGdK +wikMKOMWC5smrScSHITnOq1Ews5pe3N7qDYMzK0XVZmgDoaem4RSWMJs4My/qVLN +e0CqYWq2A22GX7sXl6pjneJYQvcAXUX+CAzp24QnPSb+Q22Guj91TcxLFcHCTDdn +qgqMsEyMiisoglwrCbO+D+1xq9mjN9tNFWP66SQ48mrrHYTBV5sz9eJyDfroJaLP +CWgbDTgq6GzRMehHT3hXfYS5NNatjnhkNISXR7pnVP/obIi/vpWh5ll6Gd8q26z+ +a/O41UzOaLTeNI365MWT4/cnXohVLRG7iVJbAbCxoQmEgsYMRc/pBAzWJtLfcB2G +jdTswYL6+MUdL8sB9pZ82D+BP/YAdHe69CyTu1lk9RT2pYtI/kkfjHubXBCYEJSG ++VGllBbYG6idQJpyrOYNRJyrDi9yvDJ2W+S0iQrlZrxzGBVGTB/y65S8C+2WTBcE +lf1Qb5GDsQrZWgD+jtWTywOYHtCBwyCKSAXxSARMbNPeak9WPlcW/Jmu+fUcMe2x +dg1KdHOa34shrKDaOVzW +=od6m +-----END PGP SIGNATURE-----''' self.revision = { 'id': 'bc0195aad0daa2ad5b0d76cce22b167bc3435590', @@ -307,10 +307,10 @@ class RevisionIdentifier(unittest.TestCase): tzinfo=linus_tz), 'message': b'Linux 4.2-rc2\n', 'metadata': { - 'extra_headers': { - 'svn-revision': 10, - 'svn-repo-uuid': '046f1af7-66c2-d61b-5410-ce57b7db7bff', - } + 'extra_headers': [ + ['svn-repo-uuid', '046f1af7-66c2-d61b-5410-ce57b7db7bff'], + ['svn-revision', 10], + ] } } @@ -336,7 +336,9 @@ class RevisionIdentifier(unittest.TestCase): 'offset': 480, }, 'metadata': { - 'gpgsig': linus_gpgsig, + 'extra_headers': [ + ['gpgsig', linus_gpgsig], + ], }, 'message': b'''Merge branch 'master' of git://github.com/alexhenrie/git-po