Skip to content
Snippets Groups Projects
Commit 90f08eea authored by Nicolas Dandrimont's avatar Nicolas Dandrimont
Browse files

Update blake2 support to be less Debian-specific

Summary:
We used to depend on the 'blake2s256' and 'blake2b512' names to be available in
hashlib.algorithms_available. It turns out that that's specific to OpenSSL 1.1.

We now try, in order:
 - blake2s256/blake2b512 as shipped by libssl1.1 (Python 3.5+ on Debian stretch
 and up)
 - blake2s/blake2b as built into Python 3.6+
 - pyblake2 if all else fails

While we're here, let's also avoid doing not-so-subtle hacks with hashlib
builtins.

Thanks to Alexios Zavras for the report.

Test Plan:
New unit tests added to check for behavior in all cases. Manually running the
tests on Python 3.4 + pyblake2, Python 3.5 and Python 3.6 as shipped by Debian
exercises all three cases.

Reviewers: zack, #reviewers!

Differential Revision: https://forge.softwareheritage.org/D347
parent 0d5bc177
No related branches found
No related tags found
No related merge requests found
...@@ -17,9 +17,21 @@ def parse_requirements(): ...@@ -17,9 +17,21 @@ def parse_requirements():
extra_requirements = [] extra_requirements = []
pyblake2_hash_sets = [
pyblake2_hashes = {'blake2s256', 'blake2b512'} # Built-in implementation in Python 3.6+
if pyblake2_hashes - set(hashlib.algorithms_available): {'blake2s', 'blake2b'},
# Potentially shipped by OpenSSL 1.1 (e.g. Python 3.5 in Debian stretch
# has these)
{'blake2s256', 'blake2b512'},
]
for pyblake2_hashes in pyblake2_hash_sets:
if not pyblake2_hashes - set(hashlib.algorithms_available):
# The required blake2 hashes have been found
break
else:
# None of the possible sets of blake2 hashes are available.
# use pyblake2 instead
extra_requirements.append('pyblake2') extra_requirements.append('pyblake2')
setup( setup(
......
...@@ -43,15 +43,61 @@ Subset of :const:`ALGORITHMS`. ...@@ -43,15 +43,61 @@ Subset of :const:`ALGORITHMS`.
HASH_BLOCK_SIZE = 32768 HASH_BLOCK_SIZE = 32768
"""Block size for streaming hash computations made in this module""" """Block size for streaming hash computations made in this module"""
# Load blake2 hashes from pyblake2 if they are not available in the builtin _blake2_hash_cache = {}
# hashlib
__pyblake2_hashes = {'blake2s256': 'blake2s',
'blake2b512': 'blake2b'} def _new_blake2_hash(algo):
__cache = hashlib.__builtin_constructor_cache """Return a function that initializes a blake2 hash.
for __hash, __pyblake2_fn in __pyblake2_hashes.items():
if __hash not in hashlib.algorithms_available: """
import pyblake2 if algo in _blake2_hash_cache:
__cache[__hash] = getattr(pyblake2, __pyblake2_fn) return _blake2_hash_cache[algo]()
lalgo = algo.lower()
if not lalgo.startswith('blake2'):
raise ValueError('Algorithm %s is not a blake2 hash' % algo)
blake_family = lalgo[:7]
digest_size = None
if lalgo[7:]:
try:
digest_size, remainder = divmod(int(lalgo[7:]), 8)
except ValueError:
raise ValueError(
'Unknown digest size for algo %s' % algo
) from None
if remainder:
raise ValueError(
'Digest size for algorithm %s must be a multiple of 8' % algo
)
if lalgo in hashlib.algorithms_available:
# Handle the case where OpenSSL ships the given algorithm
# (e.g. Python 3.5 on Debian 9 stretch)
_blake2_hash_cache[algo] = lambda: hashlib.new(lalgo)
else:
# Try using the built-in implementation for Python 3.6+
if blake_family in hashlib.algorithms_available:
blake2 = getattr(hashlib, blake_family)
else:
import pyblake2
blake2 = getattr(pyblake2, blake_family)
_blake2_hash_cache[algo] = lambda: blake2(digest_size=digest_size)
return _blake2_hash_cache[algo]()
def _new_hashlib_hash(algo):
"""Initialize a digest object from hashlib.
Handle the swh-specific names for the blake2-related algorithms
"""
if algo.startswith('blake2'):
return _new_blake2_hash(algo)
else:
return hashlib.new(algo)
def _new_git_hash(base_algo, git_type, length): def _new_git_hash(base_algo, git_type, length):
...@@ -75,7 +121,7 @@ def _new_git_hash(base_algo, git_type, length): ...@@ -75,7 +121,7 @@ def _new_git_hash(base_algo, git_type, length):
a hashutil.hash object a hashutil.hash object
""" """
h = hashlib.new(base_algo) h = _new_hashlib_hash(base_algo)
git_header = '%s %d\0' % (git_type, length) git_header = '%s %d\0' % (git_type, length)
h.update(git_header.encode('ascii')) h.update(git_header.encode('ascii'))
...@@ -113,7 +159,7 @@ def _new_hash(algo, length=None): ...@@ -113,7 +159,7 @@ def _new_hash(algo, length=None):
base_algo = algo[:-4] base_algo = algo[:-4]
return _new_git_hash(base_algo, 'blob', length) return _new_git_hash(base_algo, 'blob', length)
return hashlib.new(algo) return _new_hashlib_hash(algo)
def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None): def hash_file(fobj, length=None, algorithms=DEFAULT_ALGORITHMS, chunk_cb=None):
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
# License: GNU General Public License version 3, or any later version # License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information # See top-level LICENSE file for more information
import hashlib
import io import io
import os import os
import tempfile import tempfile
...@@ -16,6 +17,9 @@ from swh.model import hashutil ...@@ -16,6 +17,9 @@ from swh.model import hashutil
class Hashutil(unittest.TestCase): class Hashutil(unittest.TestCase):
def setUp(self): def setUp(self):
# Reset function cache
hashutil._blake2_hash_cache = {}
self.data = b'1984\n' self.data = b'1984\n'
self.hex_checksums = { self.hex_checksums = {
'sha1': '62be35bf00ff0c624f4a621e2ea5595a049e0731', 'sha1': '62be35bf00ff0c624f4a621e2ea5595a049e0731',
...@@ -150,25 +154,103 @@ class Hashutil(unittest.TestCase): ...@@ -150,25 +154,103 @@ class Hashutil(unittest.TestCase):
'expected one of blake2b512, blake2s256, ' 'expected one of blake2b512, blake2s256, '
'sha1, sha1_git, sha256') 'sha1, sha1_git, sha256')
@patch('swh.model.hashutil.hashlib') @patch('hashlib.new')
@istest @istest
def new_hash_blake2b(self, mock_hashlib): def new_hash_blake2b_blake2b512_builtin(self, mock_hashlib_new):
mock_hashlib.new.return_value = 'some-hashlib-object' if 'blake2b512' not in hashlib.algorithms_available:
self.skipTest('blake2b512 not built-in')
mock_hashlib_new.return_value = sentinel = object()
h = hashutil._new_hash('blake2b512') h = hashutil._new_hash('blake2b512')
self.assertEquals(h, 'some-hashlib-object') self.assertIs(h, sentinel)
mock_hashlib.new.assert_called_with('blake2b512') mock_hashlib_new.assert_called_with('blake2b512')
@patch('swh.model.hashutil.hashlib') @patch('hashlib.new')
@istest @istest
def new_hash_blake2s(self, mock_hashlib): def new_hash_blake2s_blake2s256_builtin(self, mock_hashlib_new):
mock_hashlib.new.return_value = 'some-hashlib-object' if 'blake2s256' not in hashlib.algorithms_available:
self.skipTest('blake2s256 not built-in')
mock_hashlib_new.return_value = sentinel = object()
h = hashutil._new_hash('blake2s256') h = hashutil._new_hash('blake2s256')
self.assertEquals(h, 'some-hashlib-object') self.assertIs(h, sentinel)
mock_hashlib.new.assert_called_with('blake2s256') mock_hashlib_new.assert_called_with('blake2s256')
@istest
def new_hash_blake2b_builtin(self):
removed_hash = False
try:
if 'blake2b512' in hashlib.algorithms_available:
removed_hash = True
hashlib.algorithms_available.remove('blake2b512')
if 'blake2b' not in hashlib.algorithms_available:
self.skipTest('blake2b not built in')
with patch('hashlib.blake2b') as mock_blake2b:
mock_blake2b.return_value = sentinel = object()
h = hashutil._new_hash('blake2b512')
self.assertIs(h, sentinel)
mock_blake2b.assert_called_with(digest_size=512//8)
finally:
if removed_hash:
hashlib.algorithms_available.add('blake2b512')
@istest
def new_hash_blake2s_builtin(self):
removed_hash = False
try:
if 'blake2s256' in hashlib.algorithms_available:
removed_hash = True
hashlib.algorithms_available.remove('blake2s256')
if 'blake2s' not in hashlib.algorithms_available:
self.skipTest('blake2s not built in')
with patch('hashlib.blake2s') as mock_blake2s:
mock_blake2s.return_value = sentinel = object()
h = hashutil._new_hash('blake2s256')
self.assertIs(h, sentinel)
mock_blake2s.assert_called_with(digest_size=256//8)
finally:
if removed_hash:
hashlib.algorithms_available.add('blake2s256')
@istest
def new_hash_blake2b_pyblake2(self):
if 'blake2b512' in hashlib.algorithms_available:
self.skipTest('blake2b512 built in')
if 'blake2b' in hashlib.algorithms_available:
self.skipTest('blake2b built in')
with patch('pyblake2.blake2b') as mock_blake2b:
mock_blake2b.return_value = sentinel = object()
h = hashutil._new_hash('blake2b512')
self.assertIs(h, sentinel)
mock_blake2b.assert_called_with(digest_size=512//8)
@istest
def new_hash_blake2s_pyblake2(self):
if 'blake2s256' in hashlib.algorithms_available:
self.skipTest('blake2s256 built in')
if 'blake2s' in hashlib.algorithms_available:
self.skipTest('blake2s built in')
with patch('pyblake2.blake2s') as mock_blake2s:
mock_blake2s.return_value = sentinel = object()
h = hashutil._new_hash('blake2s256')
self.assertIs(h, sentinel)
mock_blake2s.assert_called_with(digest_size=256//8)
class HashlibGit(unittest.TestCase): class HashlibGit(unittest.TestCase):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment