From e0dbae331f22c8bb149c87ab960ea4fc66af29ed Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Wed, 15 Feb 2017 17:45:59 +0100
Subject: [PATCH] identifiers: properly escape newlines in author
 specifications

Found by investigating T75
---
 swh/model/identifiers.py            | 22 ++++++++++++++-------
 swh/model/tests/test_identifiers.py | 30 +++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
index c53513ae..4eb2b9d7 100644
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -114,6 +114,18 @@ def _perms_to_bytes(perms):
     return oc.encode('ascii')
 
 
+def escape_newlines(snippet):
+    """Escape the newlines present in snippet according to git rules.
+
+    New lines in git manifests are escaped by indenting the next line by one
+    space."""
+
+    if b'\n' in snippet:
+        return b'\n '.join(snippet.split(b'\n'))
+    else:
+        return snippet
+
+
 def directory_identifier(directory):
     """Return the intrinsic identifier for a directory.
 
@@ -347,7 +359,7 @@ def format_author_line(header, author, date_offset):
 
     """
 
-    ret = [header.encode(), b' ', format_author(author)]
+    ret = [header.encode(), b' ', escape_newlines(format_author(author))]
 
     date_offset = normalize_timestamp(date_offset)
 
@@ -443,13 +455,9 @@ def revision_identifier(revision):
         if isinstance(value, str):
             value = value.encode('utf-8')
 
-        # multi-line values: indent continuation lines
-        if b'\n' in value:
-            value_chunks = value.split(b'\n')
-            value = b'\n '.join(value_chunks)
-
         # encode the key to utf-8
-        components.extend([key.encode('utf-8'), b' ', value, b'\n'])
+        components.extend([key.encode('utf-8'), b' ',
+                           escape_newlines(value), b'\n'])
 
     if revision['message'] is not None:
         components.extend([b'\n', revision['message']])
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
index 16a34bb9..21552950 100644
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -615,6 +615,29 @@ o6X/3T+vm8K3bf3driRr34c=
             'message': b'tagging version 20081029\n\nr56558\n',
         }
 
+        self.release_newline_in_author = {
+            'author': {
+                'email': b'esycat@gmail.com',
+                'fullname': b'Eugene Janusov\n<esycat@gmail.com>',
+                'name': b'Eugene Janusov\n',
+            },
+            'date': {
+                'negative_utc': None,
+                'offset': 600,
+                'timestamp': {
+                    'microseconds': 0,
+                    'seconds': 1377480558,
+                },
+            },
+            'id': b'\\\x98\xf5Y\xd04\x16-\xe2->\xbe\xb9T3\xe6\xf8\x88R1',
+            'message': b'Release of v0.3.2.',
+            'name': b'0.3.2',
+            'synthetic': False,
+            'target': (b'\xc0j\xa3\xd9;x\xa2\x86\\I5\x17'
+                       b'\x000\xf8\xc2\xd79o\xd3'),
+            'target_type': 'revision',
+        }
+
     @istest
     def release_identifier(self):
         self.assertEqual(
@@ -649,3 +672,10 @@ o6X/3T+vm8K3bf3driRr34c=
             identifiers.release_identifier(self.release_negative_utc),
             identifiers.identifier_to_str(self.release_negative_utc['id'])
         )
+
+    @istest
+    def release_identifier_newline_in_author(self):
+        self.assertEqual(
+            identifiers.release_identifier(self.release_newline_in_author),
+            identifiers.identifier_to_str(self.release_newline_in_author['id'])
+        )
-- 
GitLab