diff --git a/swh/model/fields/__init__.py b/swh/model/fields/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b09b0561e7cec6c0a78d5520a5db9cbf76fca35f --- /dev/null +++ b/swh/model/fields/__init__.py @@ -0,0 +1,13 @@ +# Copyright (C) 2015 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +# We do our imports here but we don't use them, so flake8 complains +# flake8: noqa + +from .simple import (validate_type, validate_int, validate_str, + validate_datetime, validate_enum) +from .hashes import (validate_sha1, validate_sha1_git, validate_sha256) +from .compound import (validate_against_schema, validate_all_keys, + validate_any_key) diff --git a/swh/model/fields/compound.py b/swh/model/fields/compound.py new file mode 100644 index 0000000000000000000000000000000000000000..f73117e8e71cc7c9f24078a9162575311c8f53c9 --- /dev/null +++ b/swh/model/fields/compound.py @@ -0,0 +1,125 @@ +# Copyright (C) 2015 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from collections import defaultdict +import itertools + +from ..exceptions import ValidationError, NON_FIELD_ERRORS + + +def validate_against_schema(model, schema, value): + """Validate a value for the given model against the given schema. + + Args: + model: the name of the model + schema: the schema to validate against + value: the value to validate + + Returns: + True if the value is correct against the schema + + Raises: + ValidationError if the value does not validate against the schema + """ + + if not isinstance(value, dict): + raise ValidationError( + 'Unexpected type %(type)s for swh object, expected dict', + params={ + 'type': value.__class__.__name__ + }, + code='swh-unexpected-type', + ) + + errors = defaultdict(list) + + for key, (mandatory, validators) in itertools.chain( + ((k, v) for k, v in schema.items() if k != NON_FIELD_ERRORS), + [(NON_FIELD_ERRORS, (False, schema.get(NON_FIELD_ERRORS, [])))] + ): + if not validators: + continue + + if not isinstance(validators, list): + validators = [validators] + + validated_value = value + if key != NON_FIELD_ERRORS: + try: + validated_value = value[key] + except KeyError: + if mandatory: + errors[key].append( + ValidationError( + 'Field %(field)s is mandatory', + params={'field': key}, + code='swh-field-mandatory', + ) + ) + + continue + else: + if errors: + # Don't validate the whole object if some fields are broken + continue + + for validator in validators: + try: + valid = validator(validated_value) + except ValidationError as e: + errors[key].append(e) + else: + if not valid: + errdata = { + 'validator': validator.__name__, + } + + if key == NON_FIELD_ERRORS: + errmsg = 'Validation of model %(model)s failed in ' \ + '%(validator)s' + errdata['model'] = model + errcode = 'model-validation-failed' + else: + errmsg = 'Validation of field %(field)s failed in ' \ + '%(validator)s' + errdata['field'] = key + errcode = 'field-validation-failed' + + errors[key].append( + ValidationError(errmsg, params=errdata, code=errcode) + ) + + if errors: + raise ValidationError(dict(errors)) + + return True + + +def validate_all_keys(value, keys): + """Validate that all the given keys are present in value""" + missing_keys = set(keys) - set(value) + if missing_keys: + missing_fields = ', '.join(sorted(missing_keys)) + raise ValidationError( + 'Missing mandatory fields %(missing_fields)', + params={'missing_fields': missing_fields}, + code='missing-mandatory-field' + ) + + return True + + +def validate_any_key(value, keys): + """Validate that any of the given keys is present in value""" + present_keys = set(keys) & set(value) + if not present_keys: + missing_fields = ', '.join(sorted(keys)) + raise ValidationError( + 'Must contain one of the alternative fields %(missing_fields)', + params={'missing_fields': missing_fields}, + code='missing-alternative-field', + ) + + return True diff --git a/swh/model/fields/hashes.py b/swh/model/fields/hashes.py new file mode 100644 index 0000000000000000000000000000000000000000..3819565d841052e2f3c166b07d85be2c79043398 --- /dev/null +++ b/swh/model/fields/hashes.py @@ -0,0 +1,117 @@ +# Copyright (C) 2015 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import string +from ..exceptions import ValidationError + + +def validate_hash(value, hash_type): + """Validate that the given value represents a hash of the given hash_type. + + Args: + value: the value to check + hash_type: the type of hash the value is representing + + Returns: + True if the hash validates + + Raises: + ValueError if the hash does not validate + """ + + hash_lengths = { + 'sha1': 20, + 'sha1_git': 20, + 'sha256': 32, + } + + hex_digits = set(string.hexdigits) + + if hash_type not in hash_lengths: + raise ValidationError( + 'Unexpected hash type %(hash_type)s, expected one of' + ' %(hash_types)s', + params={ + 'hash_type': hash_type, + 'hash_types': ', '.join(sorted(hash_lengths)), + }, + code='unexpected-hash-type') + + if isinstance(value, str): + errors = [] + extra_chars = set(value) - hex_digits + if extra_chars: + errors.append( + ValidationError( + "Unexpected characters `%(unexpected_chars)s' for hash " + "type %(hash_type)s", + params={ + 'unexpected_chars': ', '.join(sorted(extra_chars)), + 'hash_type': hash_type, + }, + code='unexpected-hash-contents', + ) + ) + + length = len(value) + expected_length = 2 * hash_lengths[hash_type] + if length != expected_length: + errors.append( + ValidationError( + 'Unexpected length %(length)d for hash type ' + '%(hash_type)s, expected %(expected_length)d', + params={ + 'length': length, + 'expected_length': expected_length, + 'hash_type': hash_type, + }, + code='unexpected-hash-length', + ) + ) + + if errors: + raise ValidationError(errors) + + return True + + if isinstance(value, bytes): + length = len(value) + expected_length = hash_lengths[hash_type] + if length != expected_length: + raise ValidationError( + 'Unexpected length %(length)d for hash type ' + '%(hash_type)s, expected %(expected_length)d', + params={ + 'length': length, + 'expected_length': expected_length, + 'hash_type': hash_type, + }, + code='unexpected-hash-length', + ) + + return True + + raise ValidationError( + 'Unexpected type %(type)s for hash, expected str or bytes', + params={ + 'type': value.__class__.__name__, + }, + code='unexpected-hash-value-type', + ) + + +def validate_sha1(sha1): + """Validate that sha1 is a valid sha1 hash""" + return validate_hash(sha1, 'sha1') + + +def validate_sha1_git(sha1_git): + """Validate that sha1_git is a valid sha1_git hash""" + return validate_hash(sha1_git, 'sha1_git') + + +def validate_sha256(sha256): + """Validate that sha256 is a valid sha256 hash""" + return validate_hash(sha256, 'sha256') diff --git a/swh/model/fields/simple.py b/swh/model/fields/simple.py new file mode 100644 index 0000000000000000000000000000000000000000..f1bff95b8689cafbe311af6c93ddaaa39f08e46e --- /dev/null +++ b/swh/model/fields/simple.py @@ -0,0 +1,71 @@ + +import datetime +import numbers + +from ..exceptions import ValidationError + + +def validate_type(value, type): + """Validate that value is an integer""" + if not isinstance(value, type): + if isinstance(type, tuple): + typestr = 'one of %s' % ', '.join(typ.__name__ for typ in type) + else: + typestr = type.__name__ + raise ValidationError( + 'Unexpected type %(type)s, expected %(expected_type)s', + params={ + 'type': value.__class__.__name__, + 'expected_type': typestr, + }, + code='unexpected-type' + ) + + return True + + +def validate_int(value): + """Validate that the given value is an int""" + return validate_type(value, numbers.Integral) + + +def validate_str(value): + """Validate that the given value is a string""" + return validate_type(value, str) + + +def validate_datetime(value): + """Validate that the given value is either a datetime, or a numeric number + of seconds since the UNIX epoch.""" + + errors = [] + try: + validate_type(value, (datetime.datetime, numbers.Real)) + except ValidationError as e: + errors.append(e) + + if isinstance(value, datetime.datetime) and value.tzinfo is None: + errors.append(ValidationError( + 'Datetimes must be timezone-aware in swh', + code='datetime-without-tzinfo', + )) + + if errors: + raise ValidationError(errors) + + return True + + +def validate_enum(value, expected_values): + """Validate that value is contained in expected_values""" + if value not in expected_values: + raise ValidationError( + 'Unexpected value %(value)s, expected one of %(expected_values)', + params={ + 'value': value, + 'expected_values': ', '.join(sorted(expected_values)), + }, + code='unexpected-value', + ) + + return True diff --git a/swh/model/tests/__init__.py b/swh/model/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/swh/model/tests/fields/__init__.py b/swh/model/tests/fields/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/swh/model/tests/test_validators.py b/swh/model/tests/fields/test_hashes.py similarity index 80% rename from swh/model/tests/test_validators.py rename to swh/model/tests/fields/test_hashes.py index e6b4be711f82bd5d4f1dc5de710f6c2fe604e206..69a739f29d5f617addf127715cfac2fc3f17cfa8 100644 --- a/swh/model/tests/test_validators.py +++ b/swh/model/tests/fields/test_hashes.py @@ -7,7 +7,8 @@ import unittest from nose.tools import istest -from swh.model import validators, exceptions +from swh.model.exceptions import ValidationError +from swh.model.fields import hashes class ValidateHash(unittest.TestCase): @@ -34,19 +35,18 @@ class ValidateHash(unittest.TestCase): @istest def valid_bytes_hash(self): for hash_type, value in self.valid_byte_hashes.items(): - self.assertTrue(validators.validate_hash(value, hash_type)) + self.assertTrue(hashes.validate_hash(value, hash_type)) @istest def valid_str_hash(self): for hash_type, value in self.valid_str_hashes.items(): - self.assertTrue(validators.validate_hash(value, hash_type)) + self.assertTrue(hashes.validate_hash(value, hash_type)) @istest def invalid_hash_type(self): hash_type = 'unknown_hash_type' - with self.assertRaises(exceptions.ValidationError) as cm: - validators.validate_hash(self.valid_str_hashes['sha1'], - hash_type) + with self.assertRaises(ValidationError) as cm: + hashes.validate_hash(self.valid_str_hashes['sha1'], hash_type) exc = cm.exception self.assertEqual(exc.code, 'unexpected-hash-type') @@ -59,8 +59,8 @@ class ValidateHash(unittest.TestCase): def invalid_bytes_len(self): for hash_type, value in self.valid_byte_hashes.items(): value = value + b'\x00\x01' - with self.assertRaises(exceptions.ValidationError) as cm: - validators.validate_hash(value, hash_type) + with self.assertRaises(ValidationError) as cm: + hashes.validate_hash(value, hash_type) exc = cm.exception self.assertEqual(exc.code, 'unexpected-hash-length') @@ -74,8 +74,8 @@ class ValidateHash(unittest.TestCase): def invalid_str_len(self): for hash_type, value in self.valid_str_hashes.items(): value = value + '0001' - with self.assertRaises(exceptions.ValidationError) as cm: - validators.validate_hash(value, hash_type) + with self.assertRaises(ValidationError) as cm: + hashes.validate_hash(value, hash_type) exc = cm.exception self.assertEqual(exc.code, 'unexpected-hash-length') @@ -89,8 +89,8 @@ class ValidateHash(unittest.TestCase): def invalid_str_contents(self): for hash_type, value in self.valid_str_hashes.items(): value = '\xa2' + value[1:-1] + '\xc3' - with self.assertRaises(exceptions.ValidationError) as cm: - validators.validate_hash(value, hash_type) + with self.assertRaises(ValidationError) as cm: + hashes.validate_hash(value, hash_type) exc = cm.exception self.assertEqual(exc.code, 'unexpected-hash-contents') @@ -103,8 +103,8 @@ class ValidateHash(unittest.TestCase): @istest def invalid_value_type(self): - with self.assertRaises(exceptions.ValidationError) as cm: - validators.validate_hash(self.bad_hash, 'sha1') + with self.assertRaises(ValidationError) as cm: + hashes.validate_hash(self.bad_hash, 'sha1') exc = cm.exception self.assertEqual(exc.code, 'unexpected-hash-value-type') diff --git a/swh/model/validators.py b/swh/model/validators.py index b3f426bded147acf959ece2387abe817caa831e6..f948ab1e13401f4bf467b8d41e3a7e511f20d045 100644 --- a/swh/model/validators.py +++ b/swh/model/validators.py @@ -3,101 +3,52 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import string +from .exceptions import ValidationError, NON_FIELD_ERRORS +from . import fields -from .exceptions import ValidationError +def validate_content(content): + """Validate that a content has the correct schema. -def validate_hash(value, hash_type): - """Validate that the given value represents a hash of the given hash_type. + Args: a content (dictionary) to validate.""" - Args: - value: the value to check - hash_type: the type of hash the value is representing + def validate_content_status(status): + return fields.validate_enum(status, {'absent', 'visible', 'hidden'}) - Returns: - True if the hash validates - - Raises: - ValueError if the hash does not validate - """ - - hash_lengths = { - 'sha1': 20, - 'sha1_git': 20, - 'sha256': 32, - } - - hex_digits = set(string.hexdigits) - - if hash_type not in hash_lengths: - raise ValidationError( - 'Unexpected hash type %(hash_type)s, expected one of' - ' %(hash_types)s', - params={ - 'hash_type': hash_type, - 'hash_types': ', '.join(sorted(hash_lengths)), - }, - code='unexpected-hash-type') - - if isinstance(value, str): + def validate_keys(content): + hashes = {'sha1', 'sha1_git', 'sha256'} errors = [] - extra_chars = set(value) - hex_digits - if extra_chars: - errors.append( - ValidationError( - "Unexpected characters `%(unexpected_chars)s' for hash " - "type %(hash_type)s", - params={ - 'unexpected_chars': ', '.join(sorted(extra_chars)), - 'hash_type': hash_type, - }, - code='unexpected-hash-contents', - ) - ) - length = len(value) - expected_length = 2 * hash_lengths[hash_type] - if length != expected_length: - errors.append( - ValidationError( - 'Unexpected length %(length)d for hash type ' - '%(hash_type)s, expected %(expected_length)d', - params={ - 'length': length, - 'expected_length': expected_length, - 'hash_type': hash_type, - }, - code='unexpected-hash-length', - ) - ) + if content['status'] == 'absent': + try: + out = fields.validate_all_keys(content, {'reason', 'origin'}) + except ValidationError as e: + errors.append(e) + try: + out = out and fields.validate_any_key(content, hashes) + except ValidationError as e: + errors.append(e) + else: + try: + out = fields.validate_all_keys(content, hashes) + except ValidationError as e: + errors.append(e) if errors: raise ValidationError(errors) - return True - - if isinstance(value, bytes): - length = len(value) - expected_length = hash_lengths[hash_type] - if length != expected_length: - raise ValidationError( - 'Unexpected length %(length)d for hash type ' - '%(hash_type)s, expected %(expected_length)d', - params={ - 'length': length, - 'expected_length': expected_length, - 'hash_type': hash_type, - }, - code='unexpected-hash-length', - ) - - return True + return out + + content_schema = { + 'sha1': (False, fields.validate_sha1), + 'sha1_git': (False, fields.validate_sha1_git), + 'sha256': (False, fields.validate_sha256), + 'status': (True, validate_content_status), + 'length': (True, fields.validate_int), + 'ctime': (True, fields.validate_datetime), + 'reason': (False, fields.validate_str), + 'origin': (False, fields.validate_int), + NON_FIELD_ERRORS: validate_keys, + } - raise ValidationError( - 'Unexpected type %(type)s for hash, expected str or bytes', - params={ - 'type': value.__class__.__name__, - }, - code='unexpected-hash-value-type', - ) + return fields.validate_against_schema('content', content_schema, content)