From 9c25f8f587b62c90a9142a0a0fb6899d0c9b6240 Mon Sep 17 00:00:00 2001
From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com>
Date: Tue, 14 Mar 2017 15:15:06 +0100
Subject: [PATCH] swh.model.hashutil: Open variable length hash algorithm
 support

The caveat is that it will only be supported when we will be using
python3 >= 3.5.

Related T692
---
 swh/model/hashutil.py            | 25 +++++++++++++++---
 swh/model/tests/test_hashutil.py | 45 ++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py
index ea28414f..0075e632 100644
--- a/swh/model/hashutil.py
+++ b/swh/model/hashutil.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2015  The Software Heritage developers
+# Copyright (C) 2015-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -6,8 +6,10 @@
 import binascii
 import functools
 import hashlib
-from io import BytesIO
 import os
+import sys
+
+from io import BytesIO
 
 # supported hashing algorithms
 ALGORITHMS = set(['sha1', 'sha256', 'sha1_git'])
@@ -64,7 +66,7 @@ def _new_hash(algo, length=None):
         ValueError if algo is unknown, or length is missing for a git-specific
         hash.
     """
-    if algo not in ALGORITHMS:
+    if algo not in ALGORITHMS and ':' not in algo:
         raise ValueError('Unexpected hashing algorithm %s, '
                          'expected one of %s' %
                          (algo, ', '.join(sorted(ALGORITHMS))))
@@ -75,6 +77,23 @@ def _new_hash(algo, length=None):
             raise ValueError('Missing length for git hashing algorithm')
         base_algo = algo[:-4]
         h = _new_git_hash(base_algo, 'blob', length)
+    elif ':' in algo:   # variable length hashing algorithms (only from
+                        # python3 >= 3.6)
+        if sys.version_info.major == 3 and sys.version_info.minor >= 6:
+            _algo = algo.split(':')
+            base_algo = _algo[0]
+            variable_length = int(_algo[1])
+
+            if base_algo == 'blake2b':
+                h = hashlib.blake2b(digest_size=variable_length)
+            elif base_algo == 'blake2s':
+                h = hashlib.blake2s(digest_size=variable_length)
+            else:
+                raise ValueError('Unexpected hashing algorithm %s, '
+                                 'expected one of %s' %
+                                 (algo, ', '.join(sorted(ALGORITHMS))))
+        else:
+            raise ValueError('Unsupported hashing algorithm %s' % algo)
     else:
         h = hashlib.new(algo)
 
diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py
index 614e7ee2..afa5a588 100644
--- a/swh/model/tests/test_hashutil.py
+++ b/swh/model/tests/test_hashutil.py
@@ -8,6 +8,7 @@ import tempfile
 import unittest
 
 from nose.tools import istest
+from unittest.mock import MagicMock, patch
 
 from swh.model import hashutil
 
@@ -124,6 +125,50 @@ class Hashutil(unittest.TestCase):
                              hashutil.bytehex_to_hash(
                                  self.hex_checksums[algo].encode()))
 
+    @istest
+    def new_hash_unsupported_hashing_algorithm(self):
+        try:
+            hashutil._new_hash('blake2:10')
+        except ValueError as e:
+            self.assertEquals(str(e),
+                              'Unsupported hashing algorithm blake2:10')
+
+    @patch('swh.model.hashutil.sys')
+    @istest
+    def new_hash_unexpected_hashing_algo(self, mock_sys):
+        mock_sys.version_info = MagicMock(major=3, minor=6)
+
+        try:
+            hashutil._new_hash('blake3:256')
+        except ValueError as e:
+            self.assertEquals(str(e),
+                              'Unexpected hashing algorithm blake3:256, '
+                              'expected one of sha1, sha1_git, sha256')
+
+    @patch('swh.model.hashutil.sys')
+    @patch('swh.model.hashutil.hashlib')
+    @istest
+    def new_hash_blake2b(self, mock_hashlib, mock_sys):
+        mock_sys.version_info = MagicMock(major=3, minor=6)
+        mock_hashlib.blake2b.return_value = 'some-hashlib-object'
+
+        h = hashutil._new_hash('blake2b:256')
+
+        self.assertEquals(h, 'some-hashlib-object')
+        mock_hashlib.blake2b.assert_called_with(digest_size=256)
+
+    @patch('swh.model.hashutil.sys')
+    @patch('swh.model.hashutil.hashlib')
+    @istest
+    def new_hash_blake2s(self, mock_hashlib, mock_sys):
+        mock_sys.version_info = MagicMock(major=3, minor=6)
+        mock_hashlib.blake2s.return_value = 'some-hashlib-object'
+
+        h = hashutil._new_hash('blake2s:128')
+
+        self.assertEquals(h, 'some-hashlib-object')
+        mock_hashlib.blake2s.assert_called_with(digest_size=128)
+
 
 class HashlibGit(unittest.TestCase):
 
-- 
GitLab