diff --git a/swh/model/exceptions.py b/swh/model/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..147c5ad8d45990f506866c4bd9f68546f663498e --- /dev/null +++ b/swh/model/exceptions.py @@ -0,0 +1,132 @@ +# Copyright (C) 2015 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# +# The ValidationError code derives from Django, and is available under the +# following license terms: +# +# Copyright (c) Django Software Foundation and individual contributors. All +# rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of Django nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +NON_FIELD_ERRORS = '__all__' + + +class ValidationError(Exception): + """An error while validating data.""" + def __init__(self, message, code=None, params=None): + """ + The `message` argument can be a single error, a list of errors, or a + dictionary that maps field names to lists of errors. What we define as + an "error" can be either a simple string or an instance of + ValidationError with its message attribute set, and what we define as + list or dictionary can be an actual `list` or `dict` or an instance + of ValidationError with its `error_list` or `error_dict` attribute set. + """ + + super().__init__(message, code, params) + + if isinstance(message, list) and len(message) == 1: + message = message[0] + + if isinstance(message, ValidationError): + if hasattr(message, 'error_dict'): + message = message.error_dict + # PY2 has a `message` property which is always there so we can't + # duck-type on it. It was introduced in Python 2.5 and already + # deprecated in Python 2.6. + elif not hasattr(message, 'message'): + message = message.error_list + else: + message, code, params = (message.message, message.code, + message.params) + + if isinstance(message, dict): + self.error_dict = {} + for field, messages in message.items(): + if not isinstance(messages, ValidationError): + messages = ValidationError(messages) + self.error_dict[field] = messages.error_list + + elif isinstance(message, list): + self.error_list = [] + for message in message: + # Normalize plain strings to instances of ValidationError. + if not isinstance(message, ValidationError): + message = ValidationError(message) + if hasattr(message, 'error_dict'): + self.error_list.extend(sum(message.error_dict.values(), + [])) + else: + self.error_list.extend(message.error_list) + + else: + self.message = message + self.code = code + self.params = params + self.error_list = [self] + + @property + def message_dict(self): + # Trigger an AttributeError if this ValidationError + # doesn't have an error_dict. + getattr(self, 'error_dict') + + return dict(self) + + @property + def messages(self): + if hasattr(self, 'error_dict'): + return sum(dict(self).values(), []) + return list(self) + + def update_error_dict(self, error_dict): + if hasattr(self, 'error_dict'): + for field, error_list in self.error_dict.items(): + error_dict.setdefault(field, []).extend(error_list) + else: + error_dict.setdefault(NON_FIELD_ERRORS, []).extend(self.error_list) + return error_dict + + def __iter__(self): + if hasattr(self, 'error_dict'): + for field, errors in self.error_dict.items(): + yield field, list(ValidationError(errors)) + else: + for error in self.error_list: + message = error.message + if error.params: + message %= error.params + yield message + + def __str__(self): + if hasattr(self, 'error_dict'): + return repr(dict(self)) + return repr(list(self)) + + def __repr__(self): + return 'ValidationError(%s)' % self diff --git a/swh/model/tests/test_validators.py b/swh/model/tests/test_validators.py new file mode 100644 index 0000000000000000000000000000000000000000..e6b4be711f82bd5d4f1dc5de710f6c2fe604e206 --- /dev/null +++ b/swh/model/tests/test_validators.py @@ -0,0 +1,114 @@ +# Copyright (C) 2015 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import unittest + +from nose.tools import istest + +from swh.model import validators, exceptions + + +class ValidateHash(unittest.TestCase): + def setUp(self): + self.valid_byte_hashes = { + 'sha1': b'\xf1\xd2\xd2\xf9\x24\xe9\x86\xac\x86\xfd\xf7\xb3\x6c\x94' + b'\xbc\xdf\x32\xbe\xec\x15', + 'sha1_git': b'\x25\x7c\xc5\x64\x2c\xb1\xa0\x54\xf0\x8c\xc8\x3f\x2d' + b'\x94\x3e\x56\xfd\x3e\xbe\x99', + 'sha256': b'\xb5\xbb\x9d\x80\x14\xa0\xf9\xb1\xd6\x1e\x21\xe7\x96' + b'\xd7\x8d\xcc\xdf\x13\x52\xf2\x3c\xd3\x28\x12\xf4\x85' + b'\x0b\x87\x8a\xe4\x94\x4c', + } + + self.valid_str_hashes = { + 'sha1': 'f1d2d2f924e986ac86fdf7b36c94bcdf32beec15', + 'sha1_git': '257cc5642cb1a054f08cc83f2d943e56fd3ebe99', + 'sha256': 'b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f485' + '0b878ae4944c', + } + + self.bad_hash = object() + + @istest + def valid_bytes_hash(self): + for hash_type, value in self.valid_byte_hashes.items(): + self.assertTrue(validators.validate_hash(value, hash_type)) + + @istest + def valid_str_hash(self): + for hash_type, value in self.valid_str_hashes.items(): + self.assertTrue(validators.validate_hash(value, hash_type)) + + @istest + def invalid_hash_type(self): + hash_type = 'unknown_hash_type' + with self.assertRaises(exceptions.ValidationError) as cm: + validators.validate_hash(self.valid_str_hashes['sha1'], + hash_type) + + exc = cm.exception + self.assertEqual(exc.code, 'unexpected-hash-type') + self.assertEqual(exc.params['hash_type'], hash_type) + + self.assertIn('Unexpected hash type', str(exc)) + self.assertIn(hash_type, str(exc)) + + @istest + def invalid_bytes_len(self): + for hash_type, value in self.valid_byte_hashes.items(): + value = value + b'\x00\x01' + with self.assertRaises(exceptions.ValidationError) as cm: + validators.validate_hash(value, hash_type) + + exc = cm.exception + self.assertEqual(exc.code, 'unexpected-hash-length') + self.assertEqual(exc.params['hash_type'], hash_type) + self.assertEqual(exc.params['length'], len(value)) + + self.assertIn('Unexpected length', str(exc)) + self.assertIn(str(len(value)), str(exc)) + + @istest + def invalid_str_len(self): + for hash_type, value in self.valid_str_hashes.items(): + value = value + '0001' + with self.assertRaises(exceptions.ValidationError) as cm: + validators.validate_hash(value, hash_type) + + exc = cm.exception + self.assertEqual(exc.code, 'unexpected-hash-length') + self.assertEqual(exc.params['hash_type'], hash_type) + self.assertEqual(exc.params['length'], len(value)) + + self.assertIn('Unexpected length', str(exc)) + self.assertIn(str(len(value)), str(exc)) + + @istest + def invalid_str_contents(self): + for hash_type, value in self.valid_str_hashes.items(): + value = '\xa2' + value[1:-1] + '\xc3' + with self.assertRaises(exceptions.ValidationError) as cm: + validators.validate_hash(value, hash_type) + + exc = cm.exception + self.assertEqual(exc.code, 'unexpected-hash-contents') + self.assertEqual(exc.params['hash_type'], hash_type) + self.assertEqual(exc.params['unexpected_chars'], '\xa2, \xc3') + + self.assertIn('Unexpected characters', str(exc)) + self.assertIn('\xc3', str(exc)) + self.assertIn('\xa2', str(exc)) + + @istest + def invalid_value_type(self): + with self.assertRaises(exceptions.ValidationError) as cm: + validators.validate_hash(self.bad_hash, 'sha1') + + exc = cm.exception + self.assertEqual(exc.code, 'unexpected-hash-value-type') + self.assertEqual(exc.params['type'], self.bad_hash.__class__.__name__) + + self.assertIn('Unexpected type', str(exc)) + self.assertIn(self.bad_hash.__class__.__name__, str(exc)) diff --git a/swh/model/validators.py b/swh/model/validators.py new file mode 100644 index 0000000000000000000000000000000000000000..b3f426bded147acf959ece2387abe817caa831e6 --- /dev/null +++ b/swh/model/validators.py @@ -0,0 +1,103 @@ +# Copyright (C) 2015 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import string + +from .exceptions import ValidationError + + +def validate_hash(value, hash_type): + """Validate that the given value represents a hash of the given hash_type. + + Args: + value: the value to check + hash_type: the type of hash the value is representing + + Returns: + True if the hash validates + + Raises: + ValueError if the hash does not validate + """ + + hash_lengths = { + 'sha1': 20, + 'sha1_git': 20, + 'sha256': 32, + } + + hex_digits = set(string.hexdigits) + + if hash_type not in hash_lengths: + raise ValidationError( + 'Unexpected hash type %(hash_type)s, expected one of' + ' %(hash_types)s', + params={ + 'hash_type': hash_type, + 'hash_types': ', '.join(sorted(hash_lengths)), + }, + code='unexpected-hash-type') + + if isinstance(value, str): + errors = [] + extra_chars = set(value) - hex_digits + if extra_chars: + errors.append( + ValidationError( + "Unexpected characters `%(unexpected_chars)s' for hash " + "type %(hash_type)s", + params={ + 'unexpected_chars': ', '.join(sorted(extra_chars)), + 'hash_type': hash_type, + }, + code='unexpected-hash-contents', + ) + ) + + length = len(value) + expected_length = 2 * hash_lengths[hash_type] + if length != expected_length: + errors.append( + ValidationError( + 'Unexpected length %(length)d for hash type ' + '%(hash_type)s, expected %(expected_length)d', + params={ + 'length': length, + 'expected_length': expected_length, + 'hash_type': hash_type, + }, + code='unexpected-hash-length', + ) + ) + + if errors: + raise ValidationError(errors) + + return True + + if isinstance(value, bytes): + length = len(value) + expected_length = hash_lengths[hash_type] + if length != expected_length: + raise ValidationError( + 'Unexpected length %(length)d for hash type ' + '%(hash_type)s, expected %(expected_length)d', + params={ + 'length': length, + 'expected_length': expected_length, + 'hash_type': hash_type, + }, + code='unexpected-hash-length', + ) + + return True + + raise ValidationError( + 'Unexpected type %(type)s for hash, expected str or bytes', + params={ + 'type': value.__class__.__name__, + }, + code='unexpected-hash-value-type', + )