Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • anlambert/swh-model
  • lunar/swh-model
  • franckbret/swh-model
  • douardda/swh-model
  • olasd/swh-model
  • swh/devel/swh-model
  • Alphare/swh-model
  • samplet/swh-model
  • marmoute/swh-model
  • rboyer/swh-model
10 results
Show changes
Showing
with 6843 additions and 1730 deletions
File added
......@@ -18,12 +18,12 @@ class ValidateCompound(unittest.TestCase):
def validate_never(model):
return False
self.test_model = 'test model'
self.test_model = "test model"
self.test_schema = {
'int': (True, simple.validate_int),
'str': (True, simple.validate_str),
'str2': (True, simple.validate_str),
'datetime': (False, simple.validate_datetime),
"int": (True, simple.validate_int),
"str": (True, simple.validate_str),
"str2": (True, simple.validate_str),
"datetime": (False, simple.validate_datetime),
NON_FIELD_ERRORS: validate_always,
}
......@@ -31,43 +31,48 @@ class ValidateCompound(unittest.TestCase):
self.test_schema_shortcut[NON_FIELD_ERRORS] = validate_never
self.test_schema_field_failed = self.test_schema.copy()
self.test_schema_field_failed['int'] = (True, [simple.validate_int,
validate_never])
self.test_schema_field_failed["int"] = (
True,
[simple.validate_int, validate_never],
)
self.test_value = {
'str': 'value1',
'str2': 'value2',
'int': 42,
'datetime': datetime.datetime(1990, 1, 1, 12, 0, 0,
tzinfo=datetime.timezone.utc),
"str": "value1",
"str2": "value2",
"int": 42,
"datetime": datetime.datetime(
1990, 1, 1, 12, 0, 0, tzinfo=datetime.timezone.utc
),
}
self.test_value_missing = {
'str': 'value1',
"str": "value1",
}
self.test_value_str_error = {
'str': 1984,
'str2': 'value2',
'int': 42,
'datetime': datetime.datetime(1990, 1, 1, 12, 0, 0,
tzinfo=datetime.timezone.utc),
"str": 1984,
"str2": "value2",
"int": 42,
"datetime": datetime.datetime(
1990, 1, 1, 12, 0, 0, tzinfo=datetime.timezone.utc
),
}
self.test_value_missing_keys = {'int'}
self.test_value_missing_keys = {"int"}
self.test_value_wrong_type = 42
self.present_keys = set(self.test_value)
self.missing_keys = {'missingkey1', 'missingkey2'}
self.missing_keys = {"missingkey1", "missingkey2"}
def test_validate_any_key(self):
self.assertTrue(
compound.validate_any_key(self.test_value, self.present_keys))
self.assertTrue(compound.validate_any_key(self.test_value, self.present_keys))
self.assertTrue(
compound.validate_any_key(self.test_value,
self.present_keys | self.missing_keys))
compound.validate_any_key(
self.test_value, self.present_keys | self.missing_keys
)
)
def test_validate_any_key_missing(self):
with self.assertRaises(ValidationError) as cm:
......@@ -75,13 +80,13 @@ class ValidateCompound(unittest.TestCase):
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'missing-alternative-field')
self.assertEqual(exc.params['missing_fields'],
', '.join(sorted(self.missing_keys)))
self.assertEqual(exc.code, "missing-alternative-field")
self.assertEqual(
exc.params["missing_fields"], ", ".join(sorted(self.missing_keys))
)
def test_validate_all_keys(self):
self.assertTrue(
compound.validate_all_keys(self.test_value, self.present_keys))
self.assertTrue(compound.validate_all_keys(self.test_value, self.present_keys))
def test_validate_all_keys_missing(self):
with self.assertRaises(ValidationError) as cm:
......@@ -89,41 +94,49 @@ class ValidateCompound(unittest.TestCase):
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'missing-mandatory-field')
self.assertEqual(exc.params['missing_fields'],
', '.join(sorted(self.missing_keys)))
self.assertEqual(exc.code, "missing-mandatory-field")
self.assertEqual(
exc.params["missing_fields"], ", ".join(sorted(self.missing_keys))
)
with self.assertRaises(ValidationError) as cm:
compound.validate_all_keys(self.test_value,
self.present_keys | self.missing_keys)
compound.validate_all_keys(
self.test_value, self.present_keys | self.missing_keys
)
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'missing-mandatory-field')
self.assertEqual(exc.params['missing_fields'],
', '.join(sorted(self.missing_keys)))
self.assertEqual(exc.code, "missing-mandatory-field")
self.assertEqual(
exc.params["missing_fields"], ", ".join(sorted(self.missing_keys))
)
def test_validate_against_schema(self):
self.assertTrue(
compound.validate_against_schema(self.test_model, self.test_schema,
self.test_value))
compound.validate_against_schema(
self.test_model, self.test_schema, self.test_value
)
)
def test_validate_against_schema_wrong_type(self):
with self.assertRaises(ValidationError) as cm:
compound.validate_against_schema(self.test_model, self.test_schema,
self.test_value_wrong_type)
compound.validate_against_schema(
self.test_model, self.test_schema, self.test_value_wrong_type
)
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'model-unexpected-type')
self.assertEqual(exc.params['model'], self.test_model)
self.assertEqual(exc.params['type'],
self.test_value_wrong_type.__class__.__name__)
self.assertEqual(exc.code, "model-unexpected-type")
self.assertEqual(exc.params["model"], self.test_model)
self.assertEqual(
exc.params["type"], self.test_value_wrong_type.__class__.__name__
)
def test_validate_against_schema_mandatory_keys(self):
with self.assertRaises(ValidationError) as cm:
compound.validate_against_schema(self.test_model, self.test_schema,
self.test_value_missing)
compound.validate_against_schema(
self.test_model, self.test_schema, self.test_value_missing
)
# The exception should be of the form:
# ValidationError({
......@@ -138,8 +151,8 @@ class ValidateCompound(unittest.TestCase):
self.assertEqual(len(nested_key), 1)
nested = nested_key[0]
self.assertIsInstance(nested, ValidationError)
self.assertEqual(nested.code, 'model-field-mandatory')
self.assertEqual(nested.params['field'], key)
self.assertEqual(nested.code, "model-field-mandatory")
self.assertEqual(nested.params["field"], key)
def test_validate_whole_schema_shortcut_previous_error(self):
with self.assertRaises(ValidationError) as cm:
......@@ -176,14 +189,15 @@ class ValidateCompound(unittest.TestCase):
nested = non_field_errors[0]
self.assertIsInstance(nested, ValidationError)
self.assertEqual(nested.code, 'model-validation-failed')
self.assertEqual(nested.params['model'], self.test_model)
self.assertEqual(nested.params['validator'], 'validate_never')
self.assertEqual(nested.code, "model-validation-failed")
self.assertEqual(nested.params["model"], self.test_model)
self.assertEqual(nested.params["validator"], "validate_never")
def test_validate_against_schema_field_error(self):
with self.assertRaises(ValidationError) as cm:
compound.validate_against_schema(self.test_model, self.test_schema,
self.test_value_str_error)
compound.validate_against_schema(
self.test_model, self.test_schema, self.test_value_str_error
)
# The exception should be of the form:
# ValidationError({
......@@ -192,21 +206,21 @@ class ValidateCompound(unittest.TestCase):
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(set(exc.error_dict.keys()), {'str'})
self.assertEqual(set(exc.error_dict.keys()), {"str"})
str_errors = exc.error_dict['str']
str_errors = exc.error_dict["str"]
self.assertIsInstance(str_errors, list)
self.assertEqual(len(str_errors), 1)
nested = str_errors[0]
self.assertIsInstance(nested, ValidationError)
self.assertEqual(nested.code, 'unexpected-type')
self.assertEqual(nested.code, "unexpected-type")
def test_validate_against_schema_field_failed(self):
with self.assertRaises(ValidationError) as cm:
compound.validate_against_schema(self.test_model,
self.test_schema_field_failed,
self.test_value)
compound.validate_against_schema(
self.test_model, self.test_schema_field_failed, self.test_value
)
# The exception should be of the form:
# ValidationError({
......@@ -215,14 +229,14 @@ class ValidateCompound(unittest.TestCase):
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(set(exc.error_dict.keys()), {'int'})
self.assertEqual(set(exc.error_dict.keys()), {"int"})
int_errors = exc.error_dict['int']
int_errors = exc.error_dict["int"]
self.assertIsInstance(int_errors, list)
self.assertEqual(len(int_errors), 1)
nested = int_errors[0]
self.assertIsInstance(nested, ValidationError)
self.assertEqual(nested.code, 'field-validation-failed')
self.assertEqual(nested.params['validator'], 'validate_never')
self.assertEqual(nested.params['field'], 'int')
self.assertEqual(nested.code, "field-validation-failed")
self.assertEqual(nested.params["validator"], "validate_never")
self.assertEqual(nested.params["field"], "int")
......@@ -12,20 +12,20 @@ from swh.model.fields import hashes
class ValidateHashes(unittest.TestCase):
def setUp(self):
self.valid_byte_hashes = {
'sha1': b'\xf1\xd2\xd2\xf9\x24\xe9\x86\xac\x86\xfd\xf7\xb3\x6c\x94'
b'\xbc\xdf\x32\xbe\xec\x15',
'sha1_git': b'\x25\x7c\xc5\x64\x2c\xb1\xa0\x54\xf0\x8c\xc8\x3f\x2d'
b'\x94\x3e\x56\xfd\x3e\xbe\x99',
'sha256': b'\xb5\xbb\x9d\x80\x14\xa0\xf9\xb1\xd6\x1e\x21\xe7\x96'
b'\xd7\x8d\xcc\xdf\x13\x52\xf2\x3c\xd3\x28\x12\xf4\x85'
b'\x0b\x87\x8a\xe4\x94\x4c',
"sha1": b"\xf1\xd2\xd2\xf9\x24\xe9\x86\xac\x86\xfd\xf7\xb3\x6c\x94"
b"\xbc\xdf\x32\xbe\xec\x15",
"sha1_git": b"\x25\x7c\xc5\x64\x2c\xb1\xa0\x54\xf0\x8c\xc8\x3f\x2d"
b"\x94\x3e\x56\xfd\x3e\xbe\x99",
"sha256": b"\xb5\xbb\x9d\x80\x14\xa0\xf9\xb1\xd6\x1e\x21\xe7\x96"
b"\xd7\x8d\xcc\xdf\x13\x52\xf2\x3c\xd3\x28\x12\xf4\x85"
b"\x0b\x87\x8a\xe4\x94\x4c",
}
self.valid_str_hashes = {
'sha1': 'f1d2d2f924e986ac86fdf7b36c94bcdf32beec15',
'sha1_git': '257cc5642cb1a054f08cc83f2d943e56fd3ebe99',
'sha256': 'b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f485'
'0b878ae4944c',
"sha1": "f1d2d2f924e986ac86fdf7b36c94bcdf32beec15",
"sha1_git": "257cc5642cb1a054f08cc83f2d943e56fd3ebe99",
"sha256": "b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f485"
"0b878ae4944c",
}
self.bad_hash = object()
......@@ -39,112 +39,108 @@ class ValidateHashes(unittest.TestCase):
self.assertTrue(hashes.validate_hash(value, hash_type))
def test_invalid_hash_type(self):
hash_type = 'unknown_hash_type'
hash_type = "unknown_hash_type"
with self.assertRaises(ValidationError) as cm:
hashes.validate_hash(self.valid_str_hashes['sha1'], hash_type)
hashes.validate_hash(self.valid_str_hashes["sha1"], hash_type)
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'unexpected-hash-type')
self.assertEqual(exc.params['hash_type'], hash_type)
self.assertEqual(exc.code, "unexpected-hash-type")
self.assertEqual(exc.params["hash_type"], hash_type)
self.assertIn('Unexpected hash type', str(exc))
self.assertIn("Unexpected hash type", str(exc))
self.assertIn(hash_type, str(exc))
def test_invalid_bytes_len(self):
for hash_type, value in self.valid_byte_hashes.items():
value = value + b'\x00\x01'
value = value + b"\x00\x01"
with self.assertRaises(ValidationError) as cm:
hashes.validate_hash(value, hash_type)
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'unexpected-hash-length')
self.assertEqual(exc.params['hash_type'], hash_type)
self.assertEqual(exc.params['length'], len(value))
self.assertEqual(exc.code, "unexpected-hash-length")
self.assertEqual(exc.params["hash_type"], hash_type)
self.assertEqual(exc.params["length"], len(value))
self.assertIn('Unexpected length', str(exc))
self.assertIn("Unexpected length", str(exc))
self.assertIn(str(len(value)), str(exc))
def test_invalid_str_len(self):
for hash_type, value in self.valid_str_hashes.items():
value = value + '0001'
value = value + "0001"
with self.assertRaises(ValidationError) as cm:
hashes.validate_hash(value, hash_type)
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'unexpected-hash-length')
self.assertEqual(exc.params['hash_type'], hash_type)
self.assertEqual(exc.params['length'], len(value))
self.assertEqual(exc.code, "unexpected-hash-length")
self.assertEqual(exc.params["hash_type"], hash_type)
self.assertEqual(exc.params["length"], len(value))
self.assertIn('Unexpected length', str(exc))
self.assertIn("Unexpected length", str(exc))
self.assertIn(str(len(value)), str(exc))
def test_invalid_str_contents(self):
for hash_type, value in self.valid_str_hashes.items():
value = '\xa2' + value[1:-1] + '\xc3'
value = "\xa2" + value[1:-1] + "\xc3"
with self.assertRaises(ValidationError) as cm:
hashes.validate_hash(value, hash_type)
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'unexpected-hash-contents')
self.assertEqual(exc.params['hash_type'], hash_type)
self.assertEqual(exc.params['unexpected_chars'], '\xa2, \xc3')
self.assertEqual(exc.code, "unexpected-hash-contents")
self.assertEqual(exc.params["hash_type"], hash_type)
self.assertEqual(exc.params["unexpected_chars"], "\xa2, \xc3")
self.assertIn('Unexpected characters', str(exc))
self.assertIn('\xc3', str(exc))
self.assertIn('\xa2', str(exc))
self.assertIn("Unexpected characters", str(exc))
self.assertIn("\xc3", str(exc))
self.assertIn("\xa2", str(exc))
def test_invalid_value_type(self):
with self.assertRaises(ValidationError) as cm:
hashes.validate_hash(self.bad_hash, 'sha1')
hashes.validate_hash(self.bad_hash, "sha1")
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'unexpected-hash-value-type')
self.assertEqual(exc.params['type'], self.bad_hash.__class__.__name__)
self.assertEqual(exc.code, "unexpected-hash-value-type")
self.assertEqual(exc.params["type"], self.bad_hash.__class__.__name__)
self.assertIn('Unexpected type', str(exc))
self.assertIn("Unexpected type", str(exc))
self.assertIn(self.bad_hash.__class__.__name__, str(exc))
def test_validate_sha1(self):
self.assertTrue(hashes.validate_sha1(self.valid_byte_hashes['sha1']))
self.assertTrue(hashes.validate_sha1(self.valid_str_hashes['sha1']))
self.assertTrue(hashes.validate_sha1(self.valid_byte_hashes["sha1"]))
self.assertTrue(hashes.validate_sha1(self.valid_str_hashes["sha1"]))
with self.assertRaises(ValidationError) as cm:
hashes.validate_sha1(self.bad_hash)
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'unexpected-hash-value-type')
self.assertEqual(exc.params['type'], self.bad_hash.__class__.__name__)
self.assertEqual(exc.code, "unexpected-hash-value-type")
self.assertEqual(exc.params["type"], self.bad_hash.__class__.__name__)
def test_validate_sha1_git(self):
self.assertTrue(
hashes.validate_sha1_git(self.valid_byte_hashes['sha1_git']))
self.assertTrue(
hashes.validate_sha1_git(self.valid_str_hashes['sha1_git']))
self.assertTrue(hashes.validate_sha1_git(self.valid_byte_hashes["sha1_git"]))
self.assertTrue(hashes.validate_sha1_git(self.valid_str_hashes["sha1_git"]))
with self.assertRaises(ValidationError) as cm:
hashes.validate_sha1_git(self.bad_hash)
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'unexpected-hash-value-type')
self.assertEqual(exc.params['type'], self.bad_hash.__class__.__name__)
self.assertEqual(exc.code, "unexpected-hash-value-type")
self.assertEqual(exc.params["type"], self.bad_hash.__class__.__name__)
def test_validate_sha256(self):
self.assertTrue(
hashes.validate_sha256(self.valid_byte_hashes['sha256']))
self.assertTrue(
hashes.validate_sha256(self.valid_str_hashes['sha256']))
self.assertTrue(hashes.validate_sha256(self.valid_byte_hashes["sha256"]))
self.assertTrue(hashes.validate_sha256(self.valid_str_hashes["sha256"]))
with self.assertRaises(ValidationError) as cm:
hashes.validate_sha256(self.bad_hash)
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'unexpected-hash-value-type')
self.assertEqual(exc.params['type'], self.bad_hash.__class__.__name__)
self.assertEqual(exc.code, "unexpected-hash-value-type")
self.assertEqual(exc.params["type"], self.bad_hash.__class__.__name__)
......@@ -12,19 +12,20 @@ from swh.model.fields import simple
class ValidateSimple(unittest.TestCase):
def setUp(self):
self.valid_str = 'I am a valid string'
self.valid_str = "I am a valid string"
self.valid_bytes = b'I am a valid bytes object'
self.valid_bytes = b"I am a valid bytes object"
self.enum_values = {'an enum value', 'other', 'and another'}
self.invalid_enum_value = 'invalid enum value'
self.enum_values = {"an enum value", "other", "and another"}
self.invalid_enum_value = "invalid enum value"
self.valid_int = 42
self.valid_real = 42.42
self.valid_datetime = datetime.datetime(1999, 1, 1, 12, 0, 0,
tzinfo=datetime.timezone.utc)
self.valid_datetime = datetime.datetime(
1999, 1, 1, 12, 0, 0, tzinfo=datetime.timezone.utc
)
self.invalid_datetime_notz = datetime.datetime(1999, 1, 1, 12, 0, 0)
def test_validate_int(self):
......@@ -36,9 +37,9 @@ class ValidateSimple(unittest.TestCase):
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'unexpected-type')
self.assertEqual(exc.params['expected_type'], 'Integral')
self.assertEqual(exc.params['type'], 'str')
self.assertEqual(exc.code, "unexpected-type")
self.assertEqual(exc.params["expected_type"], "Integral")
self.assertEqual(exc.params["type"], "str")
def test_validate_str(self):
self.assertTrue(simple.validate_str(self.valid_str))
......@@ -49,18 +50,18 @@ class ValidateSimple(unittest.TestCase):
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'unexpected-type')
self.assertEqual(exc.params['expected_type'], 'str')
self.assertEqual(exc.params['type'], 'int')
self.assertEqual(exc.code, "unexpected-type")
self.assertEqual(exc.params["expected_type"], "str")
self.assertEqual(exc.params["type"], "int")
with self.assertRaises(ValidationError) as cm:
simple.validate_str(self.valid_bytes)
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'unexpected-type')
self.assertEqual(exc.params['expected_type'], 'str')
self.assertEqual(exc.params['type'], 'bytes')
self.assertEqual(exc.code, "unexpected-type")
self.assertEqual(exc.params["expected_type"], "str")
self.assertEqual(exc.params["type"], "bytes")
def test_validate_bytes(self):
self.assertTrue(simple.validate_bytes(self.valid_bytes))
......@@ -71,18 +72,18 @@ class ValidateSimple(unittest.TestCase):
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'unexpected-type')
self.assertEqual(exc.params['expected_type'], 'bytes')
self.assertEqual(exc.params['type'], 'int')
self.assertEqual(exc.code, "unexpected-type")
self.assertEqual(exc.params["expected_type"], "bytes")
self.assertEqual(exc.params["type"], "int")
with self.assertRaises(ValidationError) as cm:
simple.validate_bytes(self.valid_str)
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'unexpected-type')
self.assertEqual(exc.params['expected_type'], 'bytes')
self.assertEqual(exc.params['type'], 'str')
self.assertEqual(exc.code, "unexpected-type")
self.assertEqual(exc.params["expected_type"], "bytes")
self.assertEqual(exc.params["type"], "str")
def test_validate_datetime(self):
self.assertTrue(simple.validate_datetime(self.valid_datetime))
......@@ -95,9 +96,9 @@ class ValidateSimple(unittest.TestCase):
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'unexpected-type')
self.assertEqual(exc.params['expected_type'], 'one of datetime, Real')
self.assertEqual(exc.params['type'], 'str')
self.assertEqual(exc.code, "unexpected-type")
self.assertEqual(exc.params["expected_type"], "one of datetime, Real")
self.assertEqual(exc.params["type"], "str")
def test_validate_datetime_invalide_tz(self):
with self.assertRaises(ValidationError) as cm:
......@@ -105,7 +106,7 @@ class ValidateSimple(unittest.TestCase):
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'datetime-without-tzinfo')
self.assertEqual(exc.code, "datetime-without-tzinfo")
def test_validate_enum(self):
for value in self.enum_values:
......@@ -117,7 +118,8 @@ class ValidateSimple(unittest.TestCase):
exc = cm.exception
self.assertIsInstance(str(exc), str)
self.assertEqual(exc.code, 'unexpected-value')
self.assertEqual(exc.params['value'], self.invalid_enum_value)
self.assertEqual(exc.params['expected_values'],
', '.join(sorted(self.enum_values)))
self.assertEqual(exc.code, "unexpected-value")
self.assertEqual(exc.params["value"], self.invalid_enum_value)
self.assertEqual(
exc.params["expected_values"], ", ".join(sorted(self.enum_values))
)
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime
from random import choice, randint, random, shuffle
from typing import Dict, List
from pytz import all_timezones, timezone
from swh.model.hashutil import MultiHash
PROTOCOLS = ["git", "http", "https", "deb", "svn", "mock"]
DOMAINS = ["example.com", "some.long.host.name", "xn--n28h.tld"]
PATHS = [
"",
"/",
"/stuff",
"/stuff/",
"/path/to/resource",
"/path/with/anchor#id=42",
"/path/with/qargs?q=1&b",
]
CONTENT_STATUS = ["visible", "hidden", "absent"]
MAX_DATE = 3e9 # around 2065
def gen_all_origins():
for protocol in PROTOCOLS:
for domain in DOMAINS:
for urlpath in PATHS:
yield {"url": "%s://%s%s" % (protocol, domain, urlpath)}
ORIGINS = list(gen_all_origins())
def gen_origins(n: int = 100) -> List:
"""Returns a list of n randomly generated origins suitable for using as
Storage.add_origin() argument.
"""
origins = ORIGINS[:]
shuffle(origins)
return origins[:n]
def gen_content():
size = randint(1, 10 * 1024)
data = bytes(randint(0, 255) for i in range(size))
status = choice(CONTENT_STATUS)
h = MultiHash.from_data(data)
ctime = datetime.fromtimestamp(random() * MAX_DATE, timezone(choice(all_timezones)))
content = {
"data": data,
"status": status,
"length": size,
"ctime": ctime,
**h.digest(),
}
if status == "absent":
content["reason"] = "why not"
content["data"] = None
return content
def gen_contents(n=20) -> List[Dict]:
"""Returns a list of n randomly generated content objects (as dict) suitable
for using as Storage.content_add() argument.
"""
return [gen_content() for i in range(n)]
......@@ -7,86 +7,86 @@ from operator import itemgetter
import os
import sys
from swh.model.from_disk import Directory, DentryPerms
from swh.model.from_disk import DentryPerms, Directory
from swh.model.hashutil import ALGORITHMS, hash_to_hex
def generate_from_directory(varname, directory, indent=0):
"""Generate test data from a given directory"""
def get_data(member, path):
yield (path, member.get_data())
if isinstance(member, Directory):
for name, child in member.items():
yield from get_data(child, os.path.join(path, name))
data = dict(get_data(directory, b''))
data = dict(get_data(directory, b""))
out = []
def format_hash(h, indent=0):
spindent = ' ' * indent
spindent = " " * indent
if len(h) > 20:
cutoff = len(h)//2
cutoff = len(h) // 2
parts = h[:cutoff], h[cutoff:]
else:
parts = [h]
out.append('hash_to_bytes(\n')
out.append("hash_to_bytes(\n")
for part in parts:
out.append(spindent + ' %s\n' % repr(hash_to_hex(part)))
out.append(spindent + ')')
out.append(spindent + " %s\n" % repr(hash_to_hex(part)))
out.append(spindent + ")")
def format_dict_items(d, indent=0):
spindent = ' ' * indent
spindent = " " * indent
for key, value in sorted(d.items()):
if isinstance(key, bytes):
out.append(spindent + repr(key) + ': {\n')
out.append(spindent + repr(key) + ": {\n")
format_dict_items(value, indent=indent + 4)
out.append(spindent + '}')
out.append(spindent + "}")
else:
out.append(spindent + repr(key) + ': ')
if key == 'entries':
out.append(spindent + repr(key) + ": ")
if key == "entries":
if not value:
out.append('[]')
out.append("[]")
else:
out.append('[')
out.append("[")
last_index = len(value) - 1
for i, entry in enumerate(
sorted(value, key=itemgetter('name'))):
sorted(value, key=itemgetter("name"))
):
if i:
out.append(' ')
out.append('{\n')
out.append(" ")
out.append("{\n")
format_dict_items(entry, indent=indent + 4)
if i != last_index:
out.append(spindent + '},')
out.append(spindent + '}]')
elif key in ALGORITHMS | {'id', 'target'}:
out.append(spindent + "},")
out.append(spindent + "}]")
elif key in ALGORITHMS | {"id", "target"}:
format_hash(value, indent=indent)
elif isinstance(value, DentryPerms):
out.append(str(value))
else:
out.append(repr(value))
out.append(',\n')
out.append(",\n")
spindent = ' ' * indent
out.append(spindent + '%s = {\n' % varname)
spindent = " " * indent
out.append(spindent + "%s = {\n" % varname)
format_dict_items(data, indent=4 + indent)
out.append(spindent + '}')
out.append(spindent + "}")
return ''.join(out)
return "".join(out)
if __name__ == '__main__':
if __name__ == "__main__":
if not sys.argv[1:]:
print("Usage: %s dir1 dir2" % sys.argv[0], file=sys.stderr)
exit(2)
for dirname in sys.argv[1:]:
basename = os.path.basename(dirname)
varname = 'expected_%s' % basename
varname = "expected_%s" % basename
testdata = generate_from_directory(
varname,
Directory.from_disk(path=os.fsencode(dirname)),
indent=8
varname, Directory.from_disk(path=os.fsencode(dirname)), indent=8
)
print(testdata)
print()
# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
from typing import Dict, List, Sequence, cast
import attr
from swh.model.hashutil import MultiHash, hash_to_bytes
from swh.model.model import (
BaseModel,
Content,
Directory,
DirectoryEntry,
ExtID,
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
ModelObjectType,
ObjectType,
Origin,
OriginVisit,
OriginVisitStatus,
Person,
RawExtrinsicMetadata,
Release,
Revision,
RevisionType,
SkippedContent,
Snapshot,
SnapshotBranch,
SnapshotTargetType,
Timestamp,
TimestampWithTimezone,
)
from swh.model.swhids import ExtendedSWHID
UTC = datetime.timezone.utc
CONTENTS: List[Content] = [
Content(
length=4,
data=f"foo{i}".encode(),
status="visible",
**MultiHash.from_data(f"foo{i}".encode()).digest(),
)
for i in range(10)
] + [
Content(
length=14,
data=f"forbidden foo{i}".encode(),
status="hidden",
**MultiHash.from_data(f"forbidden foo{i}".encode()).digest(),
)
for i in range(10)
]
SKIPPED_CONTENTS: List[SkippedContent] = [
SkippedContent(
length=4,
status="absent",
reason=f"because chr({i}) != '*'",
**MultiHash.from_data(f"bar{i}".encode()).digest(),
)
for i in range(2)
]
duplicate_content1 = Content(
length=4,
sha1=hash_to_bytes("44973274ccef6ab4dfaaf86599792fa9c3fe4689"),
sha1_git=b"another-foo",
blake2s256=b"another-bar",
sha256=b"another-baz",
status="visible",
)
# Craft a sha1 collision
sha1_array = bytearray(duplicate_content1.sha1_git)
sha1_array[0] += 1
duplicate_content2 = attr.evolve(duplicate_content1, sha1_git=bytes(sha1_array))
DUPLICATE_CONTENTS = [duplicate_content1, duplicate_content2]
COMMITTERS: List[Person] = [
Person(fullname=b"foo", name=b"foo", email=b""),
Person(fullname=b"bar", name=b"bar", email=b""),
]
DATES: List[TimestampWithTimezone] = [
TimestampWithTimezone(
timestamp=Timestamp(
seconds=1234567891,
microseconds=0,
),
offset_bytes=b"+0200",
),
TimestampWithTimezone(
timestamp=Timestamp(
seconds=1234567892,
microseconds=0,
),
offset_bytes=b"+0200",
),
]
REVISIONS: List[Revision] = [
Revision(
id=hash_to_bytes("66c7c1cd9673275037140f2abff7b7b11fc9439c"),
message=b"hello",
date=DATES[0],
committer=COMMITTERS[0],
author=COMMITTERS[0],
committer_date=DATES[0],
type=RevisionType.GIT,
directory=b"\x01" * 20,
synthetic=False,
metadata=None,
parents=(
hash_to_bytes("9b918dd063cec85c2bc63cc7f167e29f5894dcbc"),
hash_to_bytes("757f38bdcd8473aaa12df55357f5e2f1a318e672"),
),
),
Revision(
id=hash_to_bytes("c7f96242d73c267adc77c2908e64e0c1cb6a4431"),
message=b"hello again",
date=DATES[1],
committer=COMMITTERS[1],
author=COMMITTERS[1],
committer_date=DATES[1],
type=RevisionType.MERCURIAL,
directory=b"\x02" * 20,
synthetic=False,
metadata=None,
parents=(),
extra_headers=((b"foo", b"bar"),),
),
Revision(
id=hash_to_bytes("51580d63b8dcc0ec73e74994e66896858542840a"),
message=b"hello",
date=DATES[0],
committer=COMMITTERS[0],
author=COMMITTERS[0],
committer_date=DATES[0],
type=RevisionType.GIT,
directory=b"\x01" * 20,
synthetic=False,
metadata=None,
parents=(hash_to_bytes("9b918dd063cec85c2bc63cc7f167e29f5894dcbc"),),
raw_manifest=(
b"commit 207\x00"
b"tree 0101010101010101010101010101010101010101\n"
b"parent 9B918DD063CEC85C2BC63CC7F167E29F5894DCBC" # upper-cased
b"nauthor foo 1234567891 +0200\n"
b"committer foo 1234567891 +0200"
b"\n\nhello"
),
),
]
RELEASES: List[Release] = [
Release(
id=hash_to_bytes("8059dc4e17fcd0e51ca3bcd6b80f4577d281fd08"),
name=b"v0.0.1",
date=TimestampWithTimezone(
timestamp=Timestamp(
seconds=1234567890,
microseconds=0,
),
offset_bytes=b"+0200",
),
author=COMMITTERS[0],
target_type=ObjectType.REVISION,
target=b"\x04" * 20,
message=b"foo",
synthetic=False,
),
Release(
id=hash_to_bytes("ee4d20e80af850cc0f417d25dc5073792c5010d2"),
name=b"this-is-a/tag/1.0",
date=None,
author=None,
target_type=ObjectType.DIRECTORY,
target=b"\x05" * 20,
message=b"bar",
synthetic=False,
),
Release(
id=hash_to_bytes("1cdd1e87234b6f066d0855a3b5b567638a55d583"),
name=b"v0.0.1",
date=TimestampWithTimezone(
timestamp=Timestamp(
seconds=1234567890,
microseconds=0,
),
offset_bytes=b"+0200",
),
author=COMMITTERS[0],
target_type=ObjectType.REVISION,
target=b"\x04" * 20,
message=b"foo",
synthetic=False,
raw_manifest=(
b"tag 102\x00"
b"object 0404040404040404040404040404040404040404\n"
b"type commit\n"
b"tag v0.0.1\n"
b"tagger foo 1234567890 +200" # missing leading 0 for timezone
b"\n\nfoo"
),
),
]
ORIGINS: List[Origin] = [
Origin(
url="https://somewhere.org/den/fox",
),
Origin(
url="https://overtherainbow.org/fox/den",
),
]
ORIGIN_VISITS: List[OriginVisit] = [
OriginVisit(
origin=ORIGINS[0].url,
date=datetime.datetime(2013, 5, 7, 4, 20, 39, 369271, tzinfo=UTC),
visit=1,
type="git",
),
OriginVisit(
origin=ORIGINS[1].url,
date=datetime.datetime(2014, 11, 27, 17, 20, 39, tzinfo=UTC),
visit=1,
type="hg",
),
OriginVisit(
origin=ORIGINS[0].url,
date=datetime.datetime(2018, 11, 27, 17, 20, 39, tzinfo=UTC),
visit=2,
type="git",
),
OriginVisit(
origin=ORIGINS[0].url,
date=datetime.datetime(2018, 11, 27, 17, 20, 39, tzinfo=UTC),
visit=3,
type="git",
),
OriginVisit(
origin=ORIGINS[1].url,
date=datetime.datetime(2015, 11, 27, 17, 20, 39, tzinfo=UTC),
visit=2,
type="hg",
),
]
# The origin-visit-status dates needs to be shifted slightly in the future from their
# visit dates counterpart. Otherwise, we are hitting storage-wise the "on conflict"
# ignore policy (because origin-visit-add creates an origin-visit-status with the same
# parameters from the origin-visit {origin, visit, date}...
ORIGIN_VISIT_STATUSES: List[OriginVisitStatus] = [
OriginVisitStatus(
origin=ORIGINS[0].url,
date=datetime.datetime(2013, 5, 7, 4, 20, 39, 432222, tzinfo=UTC),
visit=1,
type="git",
status="ongoing",
snapshot=None,
metadata=None,
),
OriginVisitStatus(
origin=ORIGINS[1].url,
date=datetime.datetime(2014, 11, 27, 17, 21, 12, tzinfo=UTC),
visit=1,
type="hg",
status="ongoing",
snapshot=None,
metadata=None,
),
OriginVisitStatus(
origin=ORIGINS[0].url,
date=datetime.datetime(2018, 11, 27, 17, 20, 59, tzinfo=UTC),
visit=2,
type="git",
status="ongoing",
snapshot=None,
metadata=None,
),
OriginVisitStatus(
origin=ORIGINS[0].url,
date=datetime.datetime(2018, 11, 27, 17, 20, 49, tzinfo=UTC),
visit=3,
type="git",
status="full",
snapshot=hash_to_bytes("9e78d7105c5e0f886487511e2a92377b4ee4c32a"),
metadata=None,
),
OriginVisitStatus(
origin=ORIGINS[1].url,
date=datetime.datetime(2015, 11, 27, 17, 22, 18, tzinfo=UTC),
visit=2,
type="hg",
status="partial",
snapshot=hash_to_bytes("0e7f84ede9a254f2cd55649ad5240783f557e65f"),
metadata=None,
),
]
DIRECTORIES: List[Directory] = [
Directory(id=hash_to_bytes("4b825dc642cb6eb9a060e54bf8d69288fbee4904"), entries=()),
Directory(
id=hash_to_bytes("87b339104f7dc2a8163dec988445e3987995545f"),
entries=(
DirectoryEntry(
name=b"file1.ext",
perms=0o644,
type="file",
target=CONTENTS[0].sha1_git,
),
DirectoryEntry(
name=b"dir1",
perms=0o755,
type="dir",
target=hash_to_bytes("4b825dc642cb6eb9a060e54bf8d69288fbee4904"),
),
DirectoryEntry(
name=b"subprepo1",
perms=0o160000,
type="rev",
target=REVISIONS[1].id,
),
),
),
Directory(
id=hash_to_bytes("d135a91ac82a754e7f4bdeff8d56ef06d921eb7d"),
entries=(
DirectoryEntry(
name=b"file1.ext",
perms=0o644,
type="file",
target=b"\x11" * 20,
),
),
raw_manifest=(
b"tree 34\x00"
+ b"00644 file1.ext\x00" # added two leading zeros
+ b"\x11" * 20
),
),
]
SNAPSHOTS: List[Snapshot] = [
Snapshot(
id=hash_to_bytes("9e78d7105c5e0f886487511e2a92377b4ee4c32a"),
branches={
b"master": SnapshotBranch(
target_type=SnapshotTargetType.REVISION, target=REVISIONS[0].id
)
},
),
Snapshot(
id=hash_to_bytes("0e7f84ede9a254f2cd55649ad5240783f557e65f"),
branches={
b"target/revision": SnapshotBranch(
target_type=SnapshotTargetType.REVISION,
target=REVISIONS[0].id,
),
b"target/alias": SnapshotBranch(
target_type=SnapshotTargetType.ALIAS, target=b"target/revision"
),
b"target/directory": SnapshotBranch(
target_type=SnapshotTargetType.DIRECTORY,
target=DIRECTORIES[0].id,
),
b"target/release": SnapshotBranch(
target_type=SnapshotTargetType.RELEASE, target=RELEASES[0].id
),
b"target/snapshot": SnapshotBranch(
target_type=SnapshotTargetType.SNAPSHOT,
target=hash_to_bytes("9e78d7105c5e0f886487511e2a92377b4ee4c32a"),
),
},
),
]
METADATA_AUTHORITIES: List[MetadataAuthority] = [
MetadataAuthority(
type=MetadataAuthorityType.FORGE,
url="http://example.org/",
metadata={},
),
]
METADATA_FETCHERS: List[MetadataFetcher] = [
MetadataFetcher(
name="test-fetcher",
version="1.0.0",
metadata={},
)
]
RAW_EXTRINSIC_METADATA: List[RawExtrinsicMetadata] = [
RawExtrinsicMetadata(
target=Origin("http://example.org/foo.git").swhid(),
discovery_date=datetime.datetime(2020, 7, 30, 17, 8, 20, tzinfo=UTC),
authority=attr.evolve(METADATA_AUTHORITIES[0], metadata=None),
fetcher=attr.evolve(METADATA_FETCHERS[0], metadata=None),
format="json",
metadata=b'{"foo": "bar"}',
),
RawExtrinsicMetadata(
target=ExtendedSWHID.from_string(str(CONTENTS[0].swhid())),
discovery_date=datetime.datetime(2020, 7, 30, 17, 8, 20, tzinfo=UTC),
authority=attr.evolve(METADATA_AUTHORITIES[0], metadata=None),
fetcher=attr.evolve(METADATA_FETCHERS[0], metadata=None),
format="json",
metadata=b'{"foo": "bar"}',
),
]
EXTIDS: List[ExtID] = [
ExtID(
extid_type="git256",
extid=b"\x03" * 32,
target=REVISIONS[0].swhid(),
),
ExtID(
extid_type="hg",
extid=b"\x04" * 20,
target=REVISIONS[1].swhid(),
),
ExtID(
extid_type="hg-nodeid",
extid=b"\x05" * 20,
target=REVISIONS[1].swhid(),
extid_version=1,
),
ExtID(
extid_type="tarball-sha256",
extid=b"\x03" * 32,
target=DIRECTORIES[0].swhid(),
payload_type="disarchive",
payload=CONTENTS[0].sha1_git,
),
]
TEST_OBJECTS: Dict[ModelObjectType, Sequence[BaseModel]] = {}
# generate this mapping with code to avoid error
for objects in [
CONTENTS,
DIRECTORIES,
EXTIDS,
METADATA_AUTHORITIES,
METADATA_FETCHERS,
ORIGINS,
ORIGIN_VISITS,
ORIGIN_VISIT_STATUSES,
RAW_EXTRINSIC_METADATA,
RELEASES,
REVISIONS,
SNAPSHOTS,
SKIPPED_CONTENTS,
]:
objects = cast(List[BaseModel], objects)
object_type = objects[0].object_type
assert all(object_type == o.object_type for o in objects)
assert object_type not in TEST_OBJECTS
TEST_OBJECTS[object_type] = objects
SAMPLE_FOLDER_SWHIDS = [
"swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759",
"swh:1:cnt:7d5c08111e21c8a9f71540939998551683375fad",
"swh:1:cnt:68769579c3eaadbe555379b9c3538e6628bae1eb",
"swh:1:cnt:e86b45e538d9b6888c969c89fbd22a85aa0e0366",
"swh:1:dir:3c1f578394f4623f74a0ba7fe761729f59fc6ec4",
"swh:1:dir:c3020f6bf135a38c6df3afeb5fb38232c5e07087",
"swh:1:cnt:133693b125bad2b4ac318535b84901ebb1f6b638",
"swh:1:dir:4b825dc642cb6eb9a060e54bf8d69288fbee4904",
"swh:1:cnt:19102815663d23f8b75a47e7a01965dcdc96468c",
"swh:1:dir:2b41c40f0d1fbffcba12497db71fba83fcca96e5",
"swh:1:cnt:8185dfb2c0c2c597d16f75a8a0c37668567c3d7e",
"swh:1:cnt:7c4c57ba9ff496ad179b8f65b1d286edbda34c9a",
"swh:1:cnt:acac326ddd63b0bc70840659d4ac43619484e69f",
]
# Copyright (C) 2018 The Software Heritage developers
# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import sys
import tarfile
import tempfile
import unittest
import unittest.mock
from click.testing import CliRunner
import pytest
from swh.model import cli
from swh.model.hashutil import hash_to_hex
from swh.model.tests.swh_model_data import SAMPLE_FOLDER_SWHIDS
from swh.model.tests.test_from_disk import DataMixin
@pytest.mark.fs
class TestIdentify(DataMixin, unittest.TestCase):
def setUp(self):
super().setUp()
self.runner = CliRunner()
def assertPidOK(self, result, pid): # noqa: N802
self.assertEqual(result.exit_code, 0)
self.assertEqual(result.output.split()[0], pid)
def assertSWHID(self, result, swhid):
self.assertEqual(result.exit_code, 0, result.output)
self.assertEqual(result.output.split()[0], swhid)
def test_no_args(self):
result = self.runner.invoke(cli.identify)
self.assertNotEqual(result.exit_code, 0)
def test_content_id(self):
"""identify file content"""
self.make_contents(self.tmpdir_name)
for filename, content in self.contents.items():
path = os.path.join(self.tmpdir_name, filename)
result = self.runner.invoke(cli.identify,
['--type', 'content', path])
self.assertPidOK(result,
'swh:1:cnt:' + hash_to_hex(content['sha1_git']))
result = self.runner.invoke(cli.identify, ["--type", "content", path])
self.assertSWHID(result, "swh:1:cnt:" + hash_to_hex(content["sha1_git"]))
def test_content_id_from_stdin(self):
"""identify file content"""
self.make_contents(self.tmpdir_name)
for _, content in self.contents.items():
result = self.runner.invoke(cli.identify, ["-"], input=content["data"])
self.assertSWHID(result, "swh:1:cnt:" + hash_to_hex(content["sha1_git"]))
def test_directory_id(self):
"""identify an entire directory"""
self.make_from_tarball(self.tmpdir_name)
path = os.path.join(self.tmpdir_name, b'sample-folder')
result = self.runner.invoke(cli.identify,
['--type', 'directory', path])
self.assertPidOK(result,
'swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759')
path = os.path.join(self.tmpdir_name, b"sample-folder")
result = self.runner.invoke(cli.identify, ["--type", "directory", path])
self.assertSWHID(result, "swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759")
@pytest.mark.requires_optional_deps
def test_snapshot_id(self):
"""identify a snapshot"""
tarball = os.path.join(
os.path.dirname(__file__), "data", "repos", "sample-repo.tgz"
)
with tempfile.TemporaryDirectory(prefix="swh.model.cli") as d:
with tarfile.open(tarball, "r:gz") as t:
t.extractall(d)
repo_dir = os.path.join(d, "sample-repo")
result = self.runner.invoke(
cli.identify, ["--type", "snapshot", repo_dir]
)
self.assertSWHID(
result, "swh:1:snp:abc888898124270905a0ef3c67e872ce08e7e0c1"
)
def test_snapshot_without_dulwich(self):
"""checks swh-identify returns a 'nice' message instead of a traceback
when dulwich is not installed"""
with unittest.mock.patch.dict(sys.modules, {"dulwich": None}):
with tempfile.TemporaryDirectory(prefix="swh.model.cli") as d:
result = self.runner.invoke(
cli.identify,
["--type", "snapshot", d],
catch_exceptions=False,
)
assert result.exit_code == 1
assert "'swh.model[cli]'" in result.output
def test_origin_id(self):
"""identify an origin URL"""
url = "https://github.com/torvalds/linux"
result = self.runner.invoke(cli.identify, ["--type", "origin", url])
self.assertSWHID(result, "swh:1:ori:b63a575fe3faab7692c9f38fb09d4bb45651bb0f")
def test_symlink(self):
"""identify symlink --- both itself and target"""
regular = os.path.join(self.tmpdir_name, b'foo.txt')
link = os.path.join(self.tmpdir_name, b'bar.txt')
open(regular, 'w').write('foo\n')
regular = os.path.join(self.tmpdir_name, b"foo.txt")
link = os.path.join(self.tmpdir_name, b"bar.txt")
with open(regular, "w") as f:
f.write("foo\n")
os.symlink(os.path.basename(regular), link)
result = self.runner.invoke(cli.identify, [link])
self.assertPidOK(result,
'swh:1:cnt:257cc5642cb1a054f08cc83f2d943e56fd3ebe99')
self.assertSWHID(result, "swh:1:cnt:257cc5642cb1a054f08cc83f2d943e56fd3ebe99")
result = self.runner.invoke(cli.identify, ['--no-dereference', link])
self.assertPidOK(result,
'swh:1:cnt:996f1789ff67c0e3f69ef5933a55d54c5d0e9954')
result = self.runner.invoke(cli.identify, ["--no-dereference", link])
self.assertSWHID(result, "swh:1:cnt:996f1789ff67c0e3f69ef5933a55d54c5d0e9954")
def test_show_filename(self):
"""filename is shown by default"""
self.make_contents(self.tmpdir_name)
for filename, content in self.contents.items():
path = os.path.join(self.tmpdir_name, filename)
result = self.runner.invoke(cli.identify,
['--type', 'content', path])
result = self.runner.invoke(cli.identify, ["--type", "content", path])
self.assertEqual(result.exit_code, 0)
self.assertEqual(result.output.rstrip(),
'swh:1:cnt:%s\t%s' %
(hash_to_hex(content['sha1_git']), path.decode()))
self.assertEqual(
result.output.rstrip(),
"swh:1:cnt:%s\t%s" % (hash_to_hex(content["sha1_git"]), path.decode()),
)
def test_hide_filename(self):
"""filename is hidden upon request"""
self.make_contents(self.tmpdir_name)
for filename, content in self.contents.items():
path = os.path.join(self.tmpdir_name, filename)
result = self.runner.invoke(cli.identify,
['--type', 'content', '--no-filename',
path])
self.assertPidOK(result,
'swh:1:cnt:' + hash_to_hex(content['sha1_git']))
def test_auto_id(self):
"""automatic object type: file or directory, depending on argument"""
with tempfile.NamedTemporaryFile(prefix='swh.model.cli') as f:
result = self.runner.invoke(
cli.identify, ["--type", "content", "--no-filename", path]
)
self.assertSWHID(result, "swh:1:cnt:" + hash_to_hex(content["sha1_git"]))
def test_auto_content(self):
"""automatic object type detection: content"""
with tempfile.NamedTemporaryFile(prefix="swh.model.cli") as f:
result = self.runner.invoke(cli.identify, [f.name])
self.assertEqual(result.exit_code, 0)
self.assertRegex(result.output, r'^swh:\d+:cnt:')
self.assertRegex(result.output, r"^swh:\d+:cnt:")
with tempfile.TemporaryDirectory(prefix='swh.model.cli') as dirname:
def test_auto_directory(self):
"""automatic object type detection: directory"""
with tempfile.TemporaryDirectory(prefix="swh.model.cli") as dirname:
result = self.runner.invoke(cli.identify, [dirname])
self.assertEqual(result.exit_code, 0)
self.assertRegex(result.output, r'^swh:\d+:dir:')
self.assertRegex(result.output, r"^swh:\d+:dir:")
def test_auto_origin(self):
"""automatic object type detection: origin"""
result = self.runner.invoke(cli.identify, ["https://github.com/torvalds/linux"])
self.assertEqual(result.exit_code, 0, result.output)
self.assertRegex(result.output, r"^swh:\d+:ori:")
def test_verify_content(self):
"""identifier verification"""
self.make_contents(self.tmpdir_name)
for filename, content in self.contents.items():
expected_id = 'swh:1:cnt:' + hash_to_hex(content['sha1_git'])
expected_id = "swh:1:cnt:" + hash_to_hex(content["sha1_git"])
# match
path = os.path.join(self.tmpdir_name, filename)
result = self.runner.invoke(cli.identify,
['--verify', expected_id, path])
self.assertEqual(result.exit_code, 0)
result = self.runner.invoke(cli.identify, ["--verify", expected_id, path])
self.assertEqual(result.exit_code, 0, result.output)
# mismatch
with open(path, 'a') as f:
f.write('trailing garbage to make verification fail')
result = self.runner.invoke(cli.identify,
['--verify', expected_id, path])
with open(path, "a") as f:
f.write("trailing garbage to make verification fail")
result = self.runner.invoke(cli.identify, ["--verify", expected_id, path])
self.assertEqual(result.exit_code, 1)
def test_exclude(self):
"""exclude patterns"""
self.make_from_tarball(self.tmpdir_name)
path = os.path.join(self.tmpdir_name, b"sample-folder")
excluded_dir = os.path.join(path, b"excluded_dir\x96")
os.mkdir(excluded_dir)
with open(os.path.join(excluded_dir, b"some_file"), "w") as f:
f.write("content")
result = self.runner.invoke(
cli.identify, ["--type", "directory", "--exclude", "excluded_*", path]
)
self.assertSWHID(result, "swh:1:dir:e8b0f1466af8608c8a3fb9879db172b887e80759")
def test_recursive_directory(self):
self.make_from_tarball(self.tmpdir_name)
path = os.path.join(self.tmpdir_name, b"sample-folder")
result = self.runner.invoke(cli.identify, ["--recursive", path])
self.assertEqual(result.exit_code, 0, result.output)
result = result.output.split()
result_swhids = []
# get all SWHID from the result
for i in range(0, len(result)):
if i % 2 == 0:
result_swhids.append(result[i])
assert len(result_swhids) == len(SAMPLE_FOLDER_SWHIDS)
for swhid in SAMPLE_FOLDER_SWHIDS:
assert swhid in result_swhids
def test_recursive_directory_no_filename(self):
self.make_from_tarball(self.tmpdir_name)
path = os.path.join(self.tmpdir_name, b"sample-folder")
result = self.runner.invoke(
cli.identify, ["--recursive", "--no-filename", path]
)
self.assertEqual(result.exit_code, 0, result.output)
result_swhids = result.output.split()
assert len(result_swhids) == len(SAMPLE_FOLDER_SWHIDS)
for swhid in SAMPLE_FOLDER_SWHIDS:
assert swhid in result_swhids
# Copyright (C) 2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import pytest
from swh.model.collections import ImmutableDict
def test_immutabledict_empty():
d = ImmutableDict()
assert d == {}
assert d != {"foo": "bar"}
assert list(d) == []
assert list(d.items()) == []
def test_immutabledict_one_item():
d = ImmutableDict({"foo": "bar"})
assert d == {"foo": "bar"}
assert d != {}
assert d["foo"] == "bar"
with pytest.raises(KeyError, match="bar"):
d["bar"]
assert list(d) == ["foo"]
assert list(d.items()) == [("foo", "bar")]
def test_immutabledict_from_iterable():
d1 = ImmutableDict()
d2 = ImmutableDict({"foo": "bar"})
assert ImmutableDict([]) == d1
assert ImmutableDict([("foo", "bar")]) == d2
def test_immutabledict_from_immutabledict():
d1 = ImmutableDict()
d2 = ImmutableDict({"foo": "bar"})
assert ImmutableDict(d1) == d1
assert ImmutableDict(d2) == d2
def test_immutabledict_immutable():
d = ImmutableDict({"foo": "bar"})
with pytest.raises(TypeError, match="item assignment"):
d["bar"] = "baz"
with pytest.raises(TypeError, match="item deletion"):
del d["foo"]
def test_immutabledict_copy_pop():
d = ImmutableDict({"foo": "bar", "baz": "qux"})
assert d.copy_pop("foo") == ("bar", ImmutableDict({"baz": "qux"}))
assert d.copy_pop("not a key") == (None, d)
def test_hash():
assert hash(ImmutableDict()) == hash(ImmutableDict({}))
assert hash(ImmutableDict({"foo": "bar"})) == hash(ImmutableDict({"foo": "bar"}))
assert hash(ImmutableDict({"foo": "bar", "baz": "qux"})) == hash(
ImmutableDict({"foo": "bar", "baz": "qux"})
)
assert hash(ImmutableDict({"foo": "bar", "baz": "qux"})) == hash(
ImmutableDict({"baz": "qux", "foo": "bar"})
)
def test_equality_order():
assert ImmutableDict({"foo": "bar", "baz": "qux"}) == ImmutableDict(
{"foo": "bar", "baz": "qux"}
)
assert ImmutableDict({"foo": "bar", "baz": "qux"}) == ImmutableDict(
{"baz": "qux", "foo": "bar"}
)
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from dataclasses import dataclass
from typing import Iterable, List
from swh.model import discovery, model
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Sha1Git
from swh.model.tests.test_identifiers import directory_example
pytest_plugins = ["aiohttp.pytest_plugin"]
UNKNOWN_HASH = hash_to_bytes("17140cb6109f1e3296dc52e2b2cd29bcb40e86be")
KNOWN_CONTENT_HASH = hash_to_bytes("e8e4106de42e2d5d5efab6a9422b9a8677c993c8")
KNOWN_DIRECTORY_HASH = hash_to_bytes("d7ed3d2c31d608823be58b1cbe57605310615231")
KNOWN_DIRECTORY_HASH_2 = hash_to_bytes("c76724e9a0be4b60f4bf0cb48b261df8eda94b1d")
@dataclass
class FakeArchive:
contents: List[model.Content]
skipped_contents: List[model.SkippedContent]
directories: List[model.Directory]
def content_missing(self, contents: List[Sha1Git]) -> Iterable[Sha1Git]:
return []
def skipped_content_missing(
self, skipped_contents: List[Sha1Git]
) -> Iterable[Sha1Git]:
"""List skipped content missing from the archive by sha1"""
return []
def directory_missing(self, directories: List[Sha1Git]) -> Iterable[Sha1Git]:
"""List directories missing from the archive by sha1"""
return []
def test_filter_known_objects(monkeypatch):
# Test with smaller sample sizes to actually trigger the random sampling
monkeypatch.setattr(discovery, "SAMPLE_SIZE", 1)
base_directory = model.Directory.from_dict(directory_example)
# Hardcoding another hash is enough since it's all that's being checked
directory_data = directory_example.copy()
directory_data["id"] = KNOWN_DIRECTORY_HASH_2
other_directory = model.Directory.from_dict(directory_data)
archive = FakeArchive(
contents=[model.Content.from_data(b"blabla")],
skipped_contents=[model.SkippedContent.from_data(b"blabla2", reason="reason")],
directories=[
base_directory,
other_directory,
],
)
assert archive.contents[0].sha1_git == KNOWN_CONTENT_HASH
assert archive.directories[0].id == KNOWN_DIRECTORY_HASH
assert archive.directories[1].id == KNOWN_DIRECTORY_HASH_2
(contents, skipped_contents, directories) = discovery.filter_known_objects(archive)
assert len(contents) == 0
assert len(skipped_contents) == 0
assert len(directories) == 0
# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from collections import defaultdict
from functools import partial
import os
import tarfile
import tempfile
from typing import ClassVar, Optional
import unittest
import pytest
from swh.model import from_disk
from swh.model.from_disk import Content, DentryPerms, Directory
from swh.model import from_disk, model
from swh.model.from_disk import (
Content,
DentryPerms,
Directory,
DiskBackedData,
FromDiskType,
)
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
TEST_DATA = os.path.join(os.path.dirname(__file__), 'data')
TEST_DATA = os.path.join(os.path.dirname(__file__), "data")
def mk_tree(root: bytes, tree_desc: bytes):
"""Create a directory tree under `root` with content generated from `tree_desc`
tree_desc is a simple textual representation of the tree structure; each
line is an element of the directory tree structure, a trailing '/' defines
a directory, otherwise it's an (empty) file; a symlink is specified with a
' -> path' in the description. If the destination path starts with a slash ('/')
it is considered as absolute, ie. relative to the 'root' directory; e.g.
foo/bar/baz.txt
foo/baz/
foo/bar/toto -> baz.txt
foo/abstoto -> /foo/bar/baz.txt
will generate a directory structure like:
.
└── foo
├── abstoto -> bar/baz.txt
├── bar
│ ├── baz.txt
│ └── toto -> baz.txt
└── baz
The root directory must already exist.
"""
if not os.path.isdir(root):
raise EnvironmentError("The root directory must exists and be writable")
symlinks = []
for entry in tree_desc.splitlines():
entry = entry.strip()
if not entry or entry.startswith(b"#"):
continue
entry = entry.strip().lstrip(b"/")
if b".." in entry:
raise ValueError(".. in path descr is forbidden...")
if b"->" in entry:
dst, src = entry.split(b"->")
symlinks.append((src.strip(), dst.strip()))
continue
path = os.path.join(root, entry)
if entry.endswith(b"/"):
os.makedirs(path, exist_ok=True)
else:
dirname = os.path.dirname(path)
os.makedirs(dirname, exist_ok=True)
open(path, "a")
# now create symlinks
while symlinks:
src, dst = symlinks.pop(0)
fp_dst = os.path.join(root, dst)
if src.startswith(b"/"):
rp_src = src.lstrip(b"/")
else:
rp_src = os.path.join(os.path.dirname(dst), src)
fp_src = os.path.join(root, rp_src)
if not os.path.exists(fp_src):
symlinks.append((src, dst))
continue
# create the parent directory of the dst, if need be
dirname = os.path.dirname(fp_dst)
os.makedirs(dirname, exist_ok=True)
rp_src = os.path.relpath(fp_src, os.path.dirname(fp_dst))
os.symlink(rp_src, fp_dst)
def test_mk_tree(tmpdir):
desc = b"""
foo/bar/baz.txt
foo/baz/
foo/bar/toto -> baz.txt
foo/abstoto -> /foo/bar/baz.txt
baz/baz/baz/
# prefix / is ignored
/bar/a_file.txt
# symlink to a not yet defined target is ok
bar/baz/lnk -> /foo/bar/later.txt
foo/bar/later.txt
# symlink to another symlink is ok
bar/baz/lnk2 -> /foo/bar/toto
# even if the src of the symlink is defined after the dst
bar/baz/lnk3 -> /foo/bar/toto2
foo/bar/toto2 -> later.txt
"""
from os.path import isdir, isfile, islink, realpath
join = partial(os.path.join, tmpdir)
mk_tree(os.fsencode(tmpdir), desc)
assert isfile(join("foo/bar/baz.txt"))
assert isfile(join("foo/bar/later.txt"))
assert isfile(join("bar/a_file.txt"))
assert isdir(join("baz/baz/baz"))
assert islink(join("foo/bar/toto"))
assert realpath(join("foo/bar/toto")) == join("foo/bar/baz.txt")
assert islink(join("foo/bar/toto2"))
assert realpath(join("foo/bar/toto2")) == join("foo/bar/later.txt")
assert islink(join("foo/abstoto"))
assert realpath(join("foo/abstoto")) == join("foo/bar/baz.txt")
assert islink(join("bar/baz/lnk"))
assert realpath(join("bar/baz/lnk")) == join("foo/bar/later.txt")
assert islink(join("bar/baz/lnk2"))
assert realpath(join("bar/baz/lnk2")) == join("foo/bar/baz.txt")
assert islink(join("bar/baz/lnk3"))
assert realpath(join("bar/baz/lnk3")) == join("foo/bar/later.txt")
class ModeToPerms(unittest.TestCase):
......@@ -47,353 +170,399 @@ class ModeToPerms(unittest.TestCase):
self.assertEqual(perm, from_disk.mode_to_perms(fmode))
class TestDiskBackedContent(unittest.TestCase):
def test_with_data(self):
expected_content = model.Content(
length=42,
status="visible",
data=b"foo bar",
sha1=b"foo",
sha1_git=b"bar",
sha256=b"baz",
blake2s256=b"qux",
)
with tempfile.NamedTemporaryFile(mode="w+b") as fd:
content = model.Content(
length=42,
status="visible",
get_data=DiskBackedData(path=fd.name),
sha1=b"foo",
sha1_git=b"bar",
sha256=b"baz",
blake2s256=b"qux",
)
fd.write(b"foo bar")
fd.seek(0)
content_with_data = content.with_data()
assert content.to_dict() == content_with_data.to_dict()
assert expected_content == content_with_data
assert expected_content.to_dict() == content_with_data.to_dict()
def test_lazy_data(self):
with tempfile.NamedTemporaryFile(mode="w+b") as fd:
fd.write(b"foo")
fd.seek(0)
content = model.Content(
length=42,
status="visible",
get_data=DiskBackedData(path=fd.name),
sha1=b"foo",
sha1_git=b"bar",
sha256=b"baz",
blake2s256=b"qux",
)
fd.write(b"bar")
fd.seek(0)
content_with_data = content.with_data()
fd.write(b"baz")
fd.seek(0)
assert content_with_data.data == b"bar"
def test_with_data_cannot_read(self):
with tempfile.NamedTemporaryFile(mode="w+b") as fd:
content = model.Content(
length=42,
status="visible",
get_data=DiskBackedData(path=fd.name),
sha1=b"foo",
sha1_git=b"bar",
sha256=b"baz",
blake2s256=b"qux",
)
with pytest.raises(OSError):
content.with_data()
def test_missing_path(self):
with pytest.raises(model.MissingData):
c = model.Content(
length=42,
status="visible",
sha1=b"foo",
sha1_git=b"bar",
sha256=b"baz",
blake2s256=b"qux",
)
c.with_data()
with pytest.raises(model.MissingData):
c = model.Content(
length=42,
status="visible",
get_data=lambda: None,
sha1=b"foo",
sha1_git=b"bar",
sha256=b"baz",
blake2s256=b"qux",
)
c.with_data()
class DataMixin:
maxDiff = None
maxDiff: ClassVar[Optional[int]] = None
def setUp(self):
self.tmpdir = tempfile.TemporaryDirectory(
prefix='swh.model.from_disk'
)
self.tmpdir = tempfile.TemporaryDirectory(prefix="swh.model.from_disk")
self.tmpdir_name = os.fsencode(self.tmpdir.name)
self.contents = {
b'file': {
'data': b'42\n',
'sha1': hash_to_bytes(
'34973274ccef6ab4dfaaf86599792fa9c3fe4689'
),
'sha256': hash_to_bytes(
'084c799cd551dd1d8d5c5f9a5d593b2e'
'931f5e36122ee5c793c1d08a19839cc0'
b"file": {
"data": b"42\n",
"sha1": hash_to_bytes("34973274ccef6ab4dfaaf86599792fa9c3fe4689"),
"sha256": hash_to_bytes(
"084c799cd551dd1d8d5c5f9a5d593b2e"
"931f5e36122ee5c793c1d08a19839cc0"
),
'sha1_git': hash_to_bytes(
'd81cc0710eb6cf9efd5b920a8453e1e07157b6cd'),
'blake2s256': hash_to_bytes(
'd5fe1939576527e42cfd76a9455a2432'
'fe7f56669564577dd93c4280e76d661d'
"sha1_git": hash_to_bytes("d81cc0710eb6cf9efd5b920a8453e1e07157b6cd"),
"blake2s256": hash_to_bytes(
"d5fe1939576527e42cfd76a9455a2432"
"fe7f56669564577dd93c4280e76d661d"
),
'length': 3,
'mode': 0o100644
"length": 3,
"mode": 0o100644,
},
}
self.symlinks = {
b'symlink': {
'data': b'target',
'blake2s256': hash_to_bytes(
'595d221b30fdd8e10e2fdf18376e688e'
'9f18d56fd9b6d1eb6a822f8c146c6da6'
),
'sha1': hash_to_bytes(
'0e8a3ad980ec179856012b7eecf4327e99cd44cd'
),
'sha1_git': hash_to_bytes(
'1de565933b05f74c75ff9a6520af5f9f8a5a2f1d'
b"symlink": {
"data": b"target",
"blake2s256": hash_to_bytes(
"595d221b30fdd8e10e2fdf18376e688e"
"9f18d56fd9b6d1eb6a822f8c146c6da6"
),
'sha256': hash_to_bytes(
'34a04005bcaf206eec990bd9637d9fdb'
'6725e0a0c0d4aebf003f17f4c956eb5c'
"sha1": hash_to_bytes("0e8a3ad980ec179856012b7eecf4327e99cd44cd"),
"sha1_git": hash_to_bytes("1de565933b05f74c75ff9a6520af5f9f8a5a2f1d"),
"sha256": hash_to_bytes(
"34a04005bcaf206eec990bd9637d9fdb"
"6725e0a0c0d4aebf003f17f4c956eb5c"
),
'length': 6,
'perms': DentryPerms.symlink,
"length": 6,
"perms": DentryPerms.symlink,
}
}
self.specials = {
b'fifo': os.mkfifo,
b'devnull': lambda path: os.mknod(path, device=os.makedev(1, 3)),
b"fifo": os.mkfifo,
}
self.empty_content = {
'data': b'',
'length': 0,
'blake2s256': hash_to_bytes(
'69217a3079908094e11121d042354a7c'
'1f55b6482ca1a51e1b250dfd1ed0eef9'
"data": b"",
"length": 0,
"blake2s256": hash_to_bytes(
"69217a3079908094e11121d042354a7c" "1f55b6482ca1a51e1b250dfd1ed0eef9"
),
'sha1': hash_to_bytes(
'da39a3ee5e6b4b0d3255bfef95601890afd80709'
"sha1": hash_to_bytes("da39a3ee5e6b4b0d3255bfef95601890afd80709"),
"sha1_git": hash_to_bytes("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"),
"sha256": hash_to_bytes(
"e3b0c44298fc1c149afbf4c8996fb924" "27ae41e4649b934ca495991b7852b855"
),
'sha1_git': hash_to_bytes(
'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391'
),
'sha256': hash_to_bytes(
'e3b0c44298fc1c149afbf4c8996fb924'
'27ae41e4649b934ca495991b7852b855'
),
'perms': DentryPerms.content,
"perms": DentryPerms.content,
}
self.empty_directory = {
'id': hash_to_bytes(
'4b825dc642cb6eb9a060e54bf8d69288fbee4904'
),
'entries': [],
"id": hash_to_bytes("4b825dc642cb6eb9a060e54bf8d69288fbee4904"),
"entries": [],
}
# Generated with generate_testdata_from_disk
self.tarball_contents = {
b'': {
'entries': [{
'name': b'bar',
'perms': DentryPerms.directory,
'target': hash_to_bytes(
'3c1f578394f4623f74a0ba7fe761729f59fc6ec4'
),
'type': 'dir',
}, {
'name': b'empty-folder',
'perms': DentryPerms.directory,
'target': hash_to_bytes(
'4b825dc642cb6eb9a060e54bf8d69288fbee4904'
),
'type': 'dir',
}, {
'name': b'foo',
'perms': DentryPerms.directory,
'target': hash_to_bytes(
'2b41c40f0d1fbffcba12497db71fba83fcca96e5'
),
'type': 'dir',
}, {
'name': b'link-to-another-quote',
'perms': DentryPerms.symlink,
'target': hash_to_bytes(
'7d5c08111e21c8a9f71540939998551683375fad'
),
'type': 'file',
}, {
'name': b'link-to-binary',
'perms': DentryPerms.symlink,
'target': hash_to_bytes(
'e86b45e538d9b6888c969c89fbd22a85aa0e0366'
),
'type': 'file',
}, {
'name': b'link-to-foo',
'perms': DentryPerms.symlink,
'target': hash_to_bytes(
'19102815663d23f8b75a47e7a01965dcdc96468c'
),
'type': 'file',
}, {
'name': b'some-binary',
'perms': DentryPerms.executable_content,
'target': hash_to_bytes(
'68769579c3eaadbe555379b9c3538e6628bae1eb'
),
'type': 'file',
}],
'id': hash_to_bytes(
'e8b0f1466af8608c8a3fb9879db172b887e80759'
),
b"": {
"entries": [
{
"name": b"bar",
"perms": DentryPerms.directory,
"target": hash_to_bytes(
"3c1f578394f4623f74a0ba7fe761729f59fc6ec4"
),
"type": "dir",
},
{
"name": b"empty-folder",
"perms": DentryPerms.directory,
"target": hash_to_bytes(
"4b825dc642cb6eb9a060e54bf8d69288fbee4904"
),
"type": "dir",
},
{
"name": b"foo",
"perms": DentryPerms.directory,
"target": hash_to_bytes(
"2b41c40f0d1fbffcba12497db71fba83fcca96e5"
),
"type": "dir",
},
{
"name": b"link-to-another-quote",
"perms": DentryPerms.symlink,
"target": hash_to_bytes(
"7d5c08111e21c8a9f71540939998551683375fad"
),
"type": "file",
},
{
"name": b"link-to-binary",
"perms": DentryPerms.symlink,
"target": hash_to_bytes(
"e86b45e538d9b6888c969c89fbd22a85aa0e0366"
),
"type": "file",
},
{
"name": b"link-to-foo",
"perms": DentryPerms.symlink,
"target": hash_to_bytes(
"19102815663d23f8b75a47e7a01965dcdc96468c"
),
"type": "file",
},
{
"name": b"some-binary",
"perms": DentryPerms.executable_content,
"target": hash_to_bytes(
"68769579c3eaadbe555379b9c3538e6628bae1eb"
),
"type": "file",
},
],
"id": hash_to_bytes("e8b0f1466af8608c8a3fb9879db172b887e80759"),
},
b'bar': {
'entries': [{
'name': b'barfoo',
'perms': DentryPerms.directory,
'target': hash_to_bytes(
'c3020f6bf135a38c6df3afeb5fb38232c5e07087'
),
'type': 'dir',
}],
'id': hash_to_bytes(
'3c1f578394f4623f74a0ba7fe761729f59fc6ec4'
),
b"bar": {
"entries": [
{
"name": b"barfoo",
"perms": DentryPerms.directory,
"target": hash_to_bytes(
"c3020f6bf135a38c6df3afeb5fb38232c5e07087"
),
"type": "dir",
}
],
"id": hash_to_bytes("3c1f578394f4623f74a0ba7fe761729f59fc6ec4"),
},
b'bar/barfoo': {
'entries': [{
'name': b'another-quote.org',
'perms': DentryPerms.content,
'target': hash_to_bytes(
'133693b125bad2b4ac318535b84901ebb1f6b638'
),
'type': 'file',
}],
'id': hash_to_bytes(
'c3020f6bf135a38c6df3afeb5fb38232c5e07087'
),
b"bar/barfoo": {
"entries": [
{
"name": b"another-quote.org",
"perms": DentryPerms.content,
"target": hash_to_bytes(
"133693b125bad2b4ac318535b84901ebb1f6b638"
),
"type": "file",
}
],
"id": hash_to_bytes("c3020f6bf135a38c6df3afeb5fb38232c5e07087"),
},
b'bar/barfoo/another-quote.org': {
'blake2s256': hash_to_bytes(
'd26c1cad82d43df0bffa5e7be11a60e3'
'4adb85a218b433cbce5278b10b954fe8'
),
'length': 72,
'perms': DentryPerms.content,
'sha1': hash_to_bytes(
'90a6138ba59915261e179948386aa1cc2aa9220a'
),
'sha1_git': hash_to_bytes(
'133693b125bad2b4ac318535b84901ebb1f6b638'
b"bar/barfoo/another-quote.org": {
"blake2s256": hash_to_bytes(
"d26c1cad82d43df0bffa5e7be11a60e3"
"4adb85a218b433cbce5278b10b954fe8"
),
'sha256': hash_to_bytes(
'3db5ae168055bcd93a4d08285dc99ffe'
'e2883303b23fac5eab850273a8ea5546'
"length": 72,
"perms": DentryPerms.content,
"sha1": hash_to_bytes("90a6138ba59915261e179948386aa1cc2aa9220a"),
"sha1_git": hash_to_bytes("133693b125bad2b4ac318535b84901ebb1f6b638"),
"sha256": hash_to_bytes(
"3db5ae168055bcd93a4d08285dc99ffe"
"e2883303b23fac5eab850273a8ea5546"
),
},
b'empty-folder': {
'entries': [],
'id': hash_to_bytes(
'4b825dc642cb6eb9a060e54bf8d69288fbee4904'
),
b"empty-folder": {
"entries": [],
"id": hash_to_bytes("4b825dc642cb6eb9a060e54bf8d69288fbee4904"),
},
b'foo': {
'entries': [{
'name': b'barfoo',
'perms': DentryPerms.symlink,
'target': hash_to_bytes(
'8185dfb2c0c2c597d16f75a8a0c37668567c3d7e'
),
'type': 'file',
}, {
'name': b'quotes.md',
'perms': DentryPerms.content,
'target': hash_to_bytes(
'7c4c57ba9ff496ad179b8f65b1d286edbda34c9a'
),
'type': 'file',
}, {
'name': b'rel-link-to-barfoo',
'perms': DentryPerms.symlink,
'target': hash_to_bytes(
'acac326ddd63b0bc70840659d4ac43619484e69f'
),
'type': 'file',
}],
'id': hash_to_bytes(
'2b41c40f0d1fbffcba12497db71fba83fcca96e5'
),
b"foo": {
"entries": [
{
"name": b"barfoo",
"perms": DentryPerms.symlink,
"target": hash_to_bytes(
"8185dfb2c0c2c597d16f75a8a0c37668567c3d7e"
),
"type": "file",
},
{
"name": b"quotes.md",
"perms": DentryPerms.content,
"target": hash_to_bytes(
"7c4c57ba9ff496ad179b8f65b1d286edbda34c9a"
),
"type": "file",
},
{
"name": b"rel-link-to-barfoo",
"perms": DentryPerms.symlink,
"target": hash_to_bytes(
"acac326ddd63b0bc70840659d4ac43619484e69f"
),
"type": "file",
},
],
"id": hash_to_bytes("2b41c40f0d1fbffcba12497db71fba83fcca96e5"),
},
b'foo/barfoo': {
'blake2s256': hash_to_bytes(
'e1252f2caa4a72653c4efd9af871b62b'
'f2abb7bb2f1b0e95969204bd8a70d4cd'
),
'data': b'bar/barfoo',
'length': 10,
'perms': DentryPerms.symlink,
'sha1': hash_to_bytes(
'9057ee6d0162506e01c4d9d5459a7add1fedac37'
),
'sha1_git': hash_to_bytes(
'8185dfb2c0c2c597d16f75a8a0c37668567c3d7e'
b"foo/barfoo": {
"blake2s256": hash_to_bytes(
"e1252f2caa4a72653c4efd9af871b62b"
"f2abb7bb2f1b0e95969204bd8a70d4cd"
),
'sha256': hash_to_bytes(
'29ad3f5725321b940332c78e403601af'
'ff61daea85e9c80b4a7063b6887ead68'
"data": b"bar/barfoo",
"length": 10,
"perms": DentryPerms.symlink,
"sha1": hash_to_bytes("9057ee6d0162506e01c4d9d5459a7add1fedac37"),
"sha1_git": hash_to_bytes("8185dfb2c0c2c597d16f75a8a0c37668567c3d7e"),
"sha256": hash_to_bytes(
"29ad3f5725321b940332c78e403601af"
"ff61daea85e9c80b4a7063b6887ead68"
),
},
b'foo/quotes.md': {
'blake2s256': hash_to_bytes(
'bf7ce4fe304378651ee6348d3e9336ed'
'5ad603d33e83c83ba4e14b46f9b8a80b'
b"foo/quotes.md": {
"blake2s256": hash_to_bytes(
"bf7ce4fe304378651ee6348d3e9336ed"
"5ad603d33e83c83ba4e14b46f9b8a80b"
),
'length': 66,
'perms': DentryPerms.content,
'sha1': hash_to_bytes(
'1bf0bb721ac92c18a19b13c0eb3d741cbfadebfc'
),
'sha1_git': hash_to_bytes(
'7c4c57ba9ff496ad179b8f65b1d286edbda34c9a'
),
'sha256': hash_to_bytes(
'caca942aeda7b308859eb56f909ec96d'
'07a499491690c453f73b9800a93b1659'
"length": 66,
"perms": DentryPerms.content,
"sha1": hash_to_bytes("1bf0bb721ac92c18a19b13c0eb3d741cbfadebfc"),
"sha1_git": hash_to_bytes("7c4c57ba9ff496ad179b8f65b1d286edbda34c9a"),
"sha256": hash_to_bytes(
"caca942aeda7b308859eb56f909ec96d"
"07a499491690c453f73b9800a93b1659"
),
},
b'foo/rel-link-to-barfoo': {
'blake2s256': hash_to_bytes(
'd9c327421588a1cf61f316615005a2e9'
'c13ac3a4e96d43a24138d718fa0e30db'
),
'data': b'../bar/barfoo',
'length': 13,
'perms': DentryPerms.symlink,
'sha1': hash_to_bytes(
'dc51221d308f3aeb2754db48391b85687c2869f4'
),
'sha1_git': hash_to_bytes(
'acac326ddd63b0bc70840659d4ac43619484e69f'
b"foo/rel-link-to-barfoo": {
"blake2s256": hash_to_bytes(
"d9c327421588a1cf61f316615005a2e9"
"c13ac3a4e96d43a24138d718fa0e30db"
),
'sha256': hash_to_bytes(
'8007d20db2af40435f42ddef4b8ad76b'
'80adbec26b249fdf0473353f8d99df08'
"data": b"../bar/barfoo",
"length": 13,
"perms": DentryPerms.symlink,
"sha1": hash_to_bytes("dc51221d308f3aeb2754db48391b85687c2869f4"),
"sha1_git": hash_to_bytes("acac326ddd63b0bc70840659d4ac43619484e69f"),
"sha256": hash_to_bytes(
"8007d20db2af40435f42ddef4b8ad76b"
"80adbec26b249fdf0473353f8d99df08"
),
},
b'link-to-another-quote': {
'blake2s256': hash_to_bytes(
'2d0e73cea01ba949c1022dc10c8a43e6'
'6180639662e5dc2737b843382f7b1910'
b"link-to-another-quote": {
"blake2s256": hash_to_bytes(
"2d0e73cea01ba949c1022dc10c8a43e6"
"6180639662e5dc2737b843382f7b1910"
),
'data': b'bar/barfoo/another-quote.org',
'length': 28,
'perms': DentryPerms.symlink,
'sha1': hash_to_bytes(
'cbeed15e79599c90de7383f420fed7acb48ea171'
),
'sha1_git': hash_to_bytes(
'7d5c08111e21c8a9f71540939998551683375fad'
),
'sha256': hash_to_bytes(
'e6e17d0793aa750a0440eb9ad5b80b25'
'8076637ef0fb68f3ac2e59e4b9ac3ba6'
"data": b"bar/barfoo/another-quote.org",
"length": 28,
"perms": DentryPerms.symlink,
"sha1": hash_to_bytes("cbeed15e79599c90de7383f420fed7acb48ea171"),
"sha1_git": hash_to_bytes("7d5c08111e21c8a9f71540939998551683375fad"),
"sha256": hash_to_bytes(
"e6e17d0793aa750a0440eb9ad5b80b25"
"8076637ef0fb68f3ac2e59e4b9ac3ba6"
),
},
b'link-to-binary': {
'blake2s256': hash_to_bytes(
'9ce18b1adecb33f891ca36664da676e1'
'2c772cc193778aac9a137b8dc5834b9b'
),
'data': b'some-binary',
'length': 11,
'perms': DentryPerms.symlink,
'sha1': hash_to_bytes(
'd0248714948b3a48a25438232a6f99f0318f59f1'
),
'sha1_git': hash_to_bytes(
'e86b45e538d9b6888c969c89fbd22a85aa0e0366'
b"link-to-binary": {
"blake2s256": hash_to_bytes(
"9ce18b1adecb33f891ca36664da676e1"
"2c772cc193778aac9a137b8dc5834b9b"
),
'sha256': hash_to_bytes(
'14126e97d83f7d261c5a6889cee73619'
'770ff09e40c5498685aba745be882eff'
"data": b"some-binary",
"length": 11,
"perms": DentryPerms.symlink,
"sha1": hash_to_bytes("d0248714948b3a48a25438232a6f99f0318f59f1"),
"sha1_git": hash_to_bytes("e86b45e538d9b6888c969c89fbd22a85aa0e0366"),
"sha256": hash_to_bytes(
"14126e97d83f7d261c5a6889cee73619"
"770ff09e40c5498685aba745be882eff"
),
},
b'link-to-foo': {
'blake2s256': hash_to_bytes(
'08d6cad88075de8f192db097573d0e82'
'9411cd91eb6ec65e8fc16c017edfdb74'
b"link-to-foo": {
"blake2s256": hash_to_bytes(
"08d6cad88075de8f192db097573d0e82"
"9411cd91eb6ec65e8fc16c017edfdb74"
),
'data': b'foo',
'length': 3,
'perms': DentryPerms.symlink,
'sha1': hash_to_bytes(
'0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
),
'sha1_git': hash_to_bytes(
'19102815663d23f8b75a47e7a01965dcdc96468c'
),
'sha256': hash_to_bytes(
'2c26b46b68ffc68ff99b453c1d304134'
'13422d706483bfa0f98a5e886266e7ae'
"data": b"foo",
"length": 3,
"perms": DentryPerms.symlink,
"sha1": hash_to_bytes("0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33"),
"sha1_git": hash_to_bytes("19102815663d23f8b75a47e7a01965dcdc96468c"),
"sha256": hash_to_bytes(
"2c26b46b68ffc68ff99b453c1d304134"
"13422d706483bfa0f98a5e886266e7ae"
),
},
b'some-binary': {
'blake2s256': hash_to_bytes(
'922e0f7015035212495b090c27577357'
'a740ddd77b0b9e0cd23b5480c07a18c6'
),
'length': 5,
'perms': DentryPerms.executable_content,
'sha1': hash_to_bytes(
'0bbc12d7f4a2a15b143da84617d95cb223c9b23c'
),
'sha1_git': hash_to_bytes(
'68769579c3eaadbe555379b9c3538e6628bae1eb'
b"some-binary": {
"blake2s256": hash_to_bytes(
"922e0f7015035212495b090c27577357"
"a740ddd77b0b9e0cd23b5480c07a18c6"
),
'sha256': hash_to_bytes(
'bac650d34a7638bb0aeb5342646d24e3'
'b9ad6b44c9b383621faa482b990a367d'
"length": 5,
"perms": DentryPerms.executable_content,
"sha1": hash_to_bytes("0bbc12d7f4a2a15b143da84617d95cb223c9b23c"),
"sha1_git": hash_to_bytes("68769579c3eaadbe555379b9c3538e6628bae1eb"),
"sha256": hash_to_bytes(
"bac650d34a7638bb0aeb5342646d24e3"
"b9ad6b44c9b383621faa482b990a367d"
),
},
}
......@@ -401,28 +570,27 @@ class DataMixin:
def tearDown(self):
self.tmpdir.cleanup()
def assertContentEqual(self, left, right, *, check_data=False, # noqa
check_path=False):
def assertContentEqual(self, left, right, *, check_path=False): # noqa
if not isinstance(left, Content):
raise ValueError('%s is not a Content' % left)
raise ValueError("%s is not a Content" % left)
if isinstance(right, Content):
right = right.get_data()
# Compare dictionaries
keys = DEFAULT_ALGORITHMS | {
'length',
'perms',
"length",
"perms",
}
if check_data:
keys |= {'data'}
if check_path:
keys |= {'path'}
keys |= {"path"}
failed = []
for key in keys:
try:
lvalue = left.data[key]
if key == 'perms' and 'perms' not in right:
rvalue = from_disk.mode_to_perms(right['mode'])
if key == "perms" and "perms" not in right:
rvalue = from_disk.mode_to_perms(right["mode"])
else:
rvalue = right[key]
except KeyError:
......@@ -434,33 +602,35 @@ class DataMixin:
if failed:
raise self.failureException(
'Content mismatched:\n' +
'\n'.join(
'content[%s] = %r != %r' % (
key, left.data.get(key), right.get(key))
"Content mismatched:\n"
+ "\n".join(
"content[%s] = %r != %r" % (key, left.data.get(key), right.get(key))
for key in failed
)
)
def assertDirectoryEqual(self, left, right): # NoQA
if not isinstance(left, Directory):
raise ValueError('%s is not a Directory' % left)
raise ValueError("%s is not a Directory" % left)
if isinstance(right, Directory):
right = right.get_data()
return self.assertCountEqual(left.entries, right['entries'])
assert left.entries == right["entries"]
assert left.hash == right["id"]
assert left.to_model() == model.Directory.from_dict(right)
def make_contents(self, directory):
for filename, content in self.contents.items():
path = os.path.join(directory, filename)
with open(path, 'wb') as f:
f.write(content['data'])
os.chmod(path, content['mode'])
with open(path, "wb") as f:
f.write(content["data"])
os.chmod(path, content["mode"])
def make_symlinks(self, directory):
for filename, symlink in self.symlinks.items():
path = os.path.join(directory, filename)
os.symlink(symlink['data'], path)
os.symlink(symlink["data"], path)
def make_specials(self, directory):
for filename, fn in self.specials.items():
......@@ -468,9 +638,9 @@ class DataMixin:
fn(path)
def make_from_tarball(self, directory):
tarball = os.path.join(TEST_DATA, 'dir-folders', 'sample-folder.tgz')
tarball = os.path.join(TEST_DATA, "dir-folders", "sample-folder.tgz")
with tarfile.open(tarball, 'r:gz') as f:
with tarfile.open(tarball, "r:gz") as f:
f.extractall(os.fsdecode(directory))
......@@ -480,11 +650,28 @@ class TestContent(DataMixin, unittest.TestCase):
def test_data_to_content(self):
for filename, content in self.contents.items():
conv_content = Content.from_bytes(mode=content['mode'],
data=content['data'])
conv_content = Content.from_bytes(
mode=content["mode"], data=content["data"]
)
self.assertContentEqual(conv_content, content)
self.assertIn(hash_to_hex(conv_content.hash), repr(conv_content))
def test_content_swhid(self):
for _, content in self.contents.items():
content_res = Content.from_bytes(mode=content["mode"], data=content["data"])
content_swhid = "swh:1:cnt:" + hash_to_hex(content["sha1_git"])
assert str(content_res.swhid()) == content_swhid
class TestDirectory(DataMixin, unittest.TestCase):
def setUp(self):
super().setUp()
def test_directory_swhid(self):
directory_swhid = "swh:1:dir:" + hash_to_hex(self.empty_directory["id"])
directory = Directory.from_disk(path=self.tmpdir_name)
assert str(directory.swhid()) == directory_swhid
class SymlinkToContent(DataMixin, unittest.TestCase):
def setUp(self):
......@@ -496,7 +683,21 @@ class SymlinkToContent(DataMixin, unittest.TestCase):
path = os.path.join(self.tmpdir_name, filename)
perms = 0o120000
conv_content = Content.from_symlink(path=path, mode=perms)
self.assertContentEqual(conv_content, symlink)
symlink_copy = symlink.copy()
symlink_copy["path"] = path
self.assertContentEqual(conv_content, symlink_copy, check_path=True)
def test_symlink_to_base_model(self):
for filename, symlink in self.symlinks.items():
path = os.path.join(self.tmpdir_name, filename)
perms = 0o120000
model_content = Content.from_symlink(path=path, mode=perms).to_model()
right = symlink.copy()
for key in ("perms", "path", "mode"):
right.pop(key, None)
right["status"] = "visible"
assert model_content == model.Content.from_dict(right)
class FileToContent(DataMixin, unittest.TestCase):
......@@ -506,186 +707,410 @@ class FileToContent(DataMixin, unittest.TestCase):
self.make_symlinks(self.tmpdir_name)
self.make_specials(self.tmpdir_name)
def test_symlink_to_content(self):
for filename, symlink in self.symlinks.items():
path = os.path.join(self.tmpdir_name, filename)
conv_content = Content.from_file(path=path)
self.assertContentEqual(conv_content, symlink)
def test_file_to_content(self):
# Check whether loading the data works
for data in [True, False]:
for filename, content in self.contents.items():
path = os.path.join(self.tmpdir_name, filename)
conv_content = Content.from_file(path=path)
self.assertContentEqual(conv_content, content)
def test_special_to_content(self):
for filename in self.specials:
path = os.path.join(self.tmpdir_name, filename)
conv_content = Content.from_file(path=path)
self.assertContentEqual(conv_content, self.empty_content)
for path in ["/dev/null", "/dev/zero"]:
path = os.path.join(self.tmpdir_name, filename)
conv_content = Content.from_file(path=path)
self.assertContentEqual(conv_content, self.empty_content)
def test_symlink_to_content_model(self):
for filename, symlink in self.symlinks.items():
path = os.path.join(self.tmpdir_name, filename)
model_content = Content.from_file(path=path).to_model()
right = symlink.copy()
for key in ("perms", "path", "mode"):
right.pop(key, None)
right["status"] = "visible"
assert model_content == model.Content.from_dict(right)
def test_file_to_content_model(self):
for filename, content in self.contents.items():
path = os.path.join(self.tmpdir_name, filename)
model_content = Content.from_file(path=path).to_model()
right = content.copy()
for key in ("perms", "mode"):
right.pop(key, None)
assert model_content.with_data() == model.Content.from_dict(right)
right["get_data"] = DiskBackedData(path=path)
del right["data"]
assert model_content == model.Content.from_dict(right)
def test_special_to_content_model(self):
for filename in self.specials:
path = os.path.join(self.tmpdir_name, filename)
model_content = Content.from_file(path=path).to_model()
right = self.empty_content.copy()
for key in ("perms", "path", "mode"):
right.pop(key, None)
right["status"] = "visible"
assert model_content == model.Content.from_dict(right)
for path in ["/dev/null", "/dev/zero"]:
model_content = Content.from_file(path=path).to_model()
right = self.empty_content.copy()
for key in ("perms", "path", "mode"):
right.pop(key, None)
right["status"] = "visible"
assert model_content == model.Content.from_dict(right)
def test_symlink_max_length(self):
for max_content_length in [4, 10]:
for filename, symlink in self.symlinks.items():
path = os.path.join(self.tmpdir_name, filename)
conv_content = Content.from_file(path=path, data=data)
self.assertContentEqual(conv_content, symlink, check_data=data)
content = Content.from_file(path=path)
if content.data["length"] > max_content_length:
with pytest.raises(Exception, match="too large"):
Content.from_file(
path=path, max_content_length=max_content_length
)
else:
limited_content = Content.from_file(
path=path, max_content_length=max_content_length
)
assert content == limited_content
def test_file_max_length(self):
for max_content_length in [2, 4]:
for filename, content in self.contents.items():
path = os.path.join(self.tmpdir_name, filename)
conv_content = Content.from_file(path=path, data=data)
self.assertContentEqual(conv_content, content, check_data=data)
content = Content.from_file(path=path)
limited_content = Content.from_file(
path=path, max_content_length=max_content_length
)
assert content.data["length"] == limited_content.data["length"]
assert content.data["status"] == "visible"
if content.data["length"] > max_content_length:
assert limited_content.data["status"] == "absent"
assert limited_content.data["reason"] == "Content too large"
else:
assert limited_content.data["status"] == "visible"
def test_special_file_max_length(self):
for max_content_length in [None, 0, 1]:
for filename in self.specials:
path = os.path.join(self.tmpdir_name, filename)
conv_content = Content.from_file(path=path, data=data)
self.assertContentEqual(conv_content, self.empty_content)
content = Content.from_file(path=path)
limited_content = Content.from_file(
path=path, max_content_length=max_content_length
)
assert limited_content == content
def test_file_to_content_with_path(self):
for filename, content in self.contents.items():
content_w_path = content.copy()
path = os.path.join(self.tmpdir_name, filename)
content_w_path['path'] = path
conv_content = Content.from_file(path=path, save_path=True)
self.assertContentEqual(conv_content, content_w_path,
check_path=True)
content_w_path["path"] = path
conv_content = Content.from_file(path=path)
self.assertContentEqual(conv_content, content_w_path, check_path=True)
class DirectoryToObjects(DataMixin, unittest.TestCase):
def setUp(self):
super().setUp()
contents = os.path.join(self.tmpdir_name, b'contents')
contents = os.path.join(self.tmpdir_name, b"contents")
os.mkdir(contents)
self.make_contents(contents)
symlinks = os.path.join(self.tmpdir_name, b'symlinks')
symlinks = os.path.join(self.tmpdir_name, b"symlinks")
os.mkdir(symlinks)
self.make_symlinks(symlinks)
specials = os.path.join(self.tmpdir_name, b'specials')
specials = os.path.join(self.tmpdir_name, b"specials")
os.mkdir(specials)
self.make_specials(specials)
empties = os.path.join(self.tmpdir_name, b'empty1', b'empty2')
empties = os.path.join(self.tmpdir_name, b"empty1", b"empty2")
os.makedirs(empties)
def check_collect(
self, directory, expected_directory_count, expected_content_count
):
objs = directory.collect()
contents = []
directories = []
for obj in objs:
if isinstance(obj, Content):
contents.append(obj)
elif isinstance(obj, Directory):
directories.append(obj)
self.assertEqual(len(directories), expected_directory_count)
self.assertEqual(len(contents), expected_content_count)
def test_directory_to_objects(self):
directory = Directory.from_disk(path=self.tmpdir_name)
for name, value in self.contents.items():
self.assertContentEqual(directory[b'contents/' + name], value)
self.assertContentEqual(directory[b"contents/" + name], value)
for name, value in self.symlinks.items():
self.assertContentEqual(directory[b'symlinks/' + name], value)
self.assertContentEqual(directory[b"symlinks/" + name], value)
for name in self.specials:
self.assertContentEqual(
directory[b'specials/' + name],
directory[b"specials/" + name],
self.empty_content,
)
self.assertEqual(
directory[b'empty1/empty2'].get_data(),
directory[b"empty1/empty2"].get_data(),
self.empty_directory,
)
# Raise on non existent file
with self.assertRaisesRegex(KeyError, "b'nonexistent'"):
directory[b'empty1/nonexistent']
directory[b"empty1/nonexistent"]
# Raise on non existent directory
with self.assertRaisesRegex(KeyError, "b'nonexistentdir'"):
directory[b'nonexistentdir/file']
directory[b"nonexistentdir/file"]
objs = directory.collect()
self.assertCountEqual(['content', 'directory'], objs)
self.assertEqual(len(objs['directory']), 6)
self.assertEqual(len(objs['content']),
len(self.contents)
+ len(self.symlinks)
+ 1)
self.check_collect(
directory,
expected_directory_count=6,
expected_content_count=len(self.contents) + len(self.symlinks) + 1,
)
def test_directory_to_objects_ignore_empty(self):
directory = Directory.from_disk(
path=self.tmpdir_name,
dir_filter=from_disk.ignore_empty_directories
path=self.tmpdir_name, path_filter=from_disk.ignore_empty_directories
)
for name, value in self.contents.items():
self.assertContentEqual(directory[b'contents/' + name], value)
self.assertContentEqual(directory[b"contents/" + name], value)
for name, value in self.symlinks.items():
self.assertContentEqual(directory[b'symlinks/' + name], value)
self.assertContentEqual(directory[b"symlinks/" + name], value)
for name in self.specials:
self.assertContentEqual(
directory[b'specials/' + name],
directory[b"specials/" + name],
self.empty_content,
)
# empty directories have been ignored recursively
with self.assertRaisesRegex(KeyError, "b'empty1'"):
directory[b'empty1']
directory[b"empty1"]
with self.assertRaisesRegex(KeyError, "b'empty1'"):
directory[b'empty1/empty2']
objs = directory.collect()
self.assertCountEqual(['content', 'directory'], objs)
directory[b"empty1/empty2"]
self.assertEqual(len(objs['directory']), 4)
self.assertEqual(len(objs['content']),
len(self.contents)
+ len(self.symlinks)
+ 1)
self.check_collect(
directory,
expected_directory_count=4,
expected_content_count=len(self.contents) + len(self.symlinks) + 1,
)
def test_directory_to_objects_ignore_name(self):
pfilter = from_disk.ignore_named_directories([b"symlinks"])
directory = Directory.from_disk(
path=self.tmpdir_name,
dir_filter=from_disk.ignore_named_directories([b'symlinks'])
path_filter=pfilter,
)
for name, value in self.contents.items():
self.assertContentEqual(directory[b'contents/' + name], value)
self.assertContentEqual(directory[b"contents/" + name], value)
for name in self.specials:
self.assertContentEqual(
directory[b'specials/' + name],
directory[b"specials/" + name],
self.empty_content,
)
self.assertEqual(
directory[b'empty1/empty2'].get_data(),
directory[b"empty1/empty2"].get_data(),
self.empty_directory,
)
with self.assertRaisesRegex(KeyError, "b'symlinks'"):
directory[b'symlinks']
objs = directory.collect()
directory[b"symlinks"]
self.assertCountEqual(['content', 'directory'], objs)
self.check_collect(
directory,
expected_directory_count=5,
expected_content_count=len(self.contents) + 1,
)
self.assertEqual(len(objs['directory']), 5)
self.assertEqual(len(objs['content']),
len(self.contents)
+ 1)
def test_directory_to_objects_ignore_name_with_slash(self):
self.tmpdir_name = self.tmpdir_name + b"/"
self.test_directory_to_objects_ignore_name()
def test_directory_to_objects_ignore_name_case(self):
directory = Directory.from_disk(
path=self.tmpdir_name,
dir_filter=from_disk.ignore_named_directories([b'symLiNks'],
case_sensitive=False)
path_filter=from_disk.ignore_named_directories(
[b"symLiNks"], case_sensitive=False
),
)
for name, value in self.contents.items():
self.assertContentEqual(directory[b'contents/' + name], value)
self.assertContentEqual(directory[b"contents/" + name], value)
for name in self.specials:
self.assertContentEqual(
directory[b'specials/' + name],
directory[b"specials/" + name],
self.empty_content,
)
self.assertEqual(
directory[b'empty1/empty2'].get_data(),
directory[b"empty1/empty2"].get_data(),
self.empty_directory,
)
with self.assertRaisesRegex(KeyError, "b'symlinks'"):
directory[b'symlinks']
directory[b"symlinks"]
objs = directory.collect()
self.check_collect(
directory,
expected_directory_count=5,
expected_content_count=len(self.contents) + 1,
)
def test_directory_entry_order(self):
with tempfile.TemporaryDirectory() as dirname:
dirname = os.fsencode(dirname)
mk_tree(
dirname,
b"""
/foo.
/foo0
/foo/
""",
)
directory = Directory.from_disk(path=dirname)
assert [entry["name"] for entry in directory.entries] == [
b"foo.",
b"foo",
b"foo0",
]
def test_directory_path_filter(self):
def filter_func(path, name, entries):
return name.startswith(b"foo")
with tempfile.TemporaryDirectory() as dirname:
dirname = os.fsencode(dirname)
mk_tree(
dirname,
b"""
/foofile
/file
/foo/foo/
/baz/
""",
)
# No filters
directory = Directory.from_disk(path=dirname)
assert [entry["name"] for entry in directory.entries] == [
b"baz",
b"file",
b"foo",
b"foofile",
]
# Filter paths
directory = Directory.from_disk(path=dirname, path_filter=filter_func)
assert [entry["name"] for entry in directory.entries] == [
b"foo",
b"foofile",
]
def test_directory_progress_callback(self):
total = []
def update_info(arg):
assert type(arg) is int
total.append(arg)
Directory.from_disk(path=self.tmpdir_name, progress_callback=update_info)
# Corresponds to the deeper files and directories plus the four top level ones
assert total == [4, 1, 1, 1, 1]
def test_exclude_trailing(self):
self.test_exclude(trailing_slash=True)
def test_exclude(self, trailing_slash=False):
"""exclude patterns"""
with tempfile.TemporaryDirectory() as dirname:
dirname = os.fsencode(dirname)
mk_tree(
dirname,
b"""
/foofile
/file
/foo/foo/
/baz/
/excluded_dir/file
/excluded_dir\x96/file
/excluded_dir2/
/excluded_dir2\x96/
/foo/excluded_dir/
/foo/excluded_dir2\x96/
""",
)
self.assertCountEqual(['content', 'directory'], objs)
# no filter
dir_path = dirname
if trailing_slash:
dir_path += b"/"
directory = Directory.from_disk(path=dir_path)
assert set(directory.keys()) == {
b"baz",
b"foo",
b"excluded_dir2\x96",
b"excluded_dir",
b"excluded_dir\x96",
b"excluded_dir2",
b"foofile",
b"file",
}
assert set(directory[b"foo"].keys()) == {
b"foo",
b"excluded_dir2\x96",
b"excluded_dir",
}
assert (
str(directory.swhid())
== "swh:1:dir:cd4dfab9b3e160a683f036841e03855929a07286"
)
self.assertEqual(len(objs['directory']), 5)
self.assertEqual(len(objs['content']),
len(self.contents)
+ 1)
from swh.model.from_disk import ignore_directories_patterns
exclude_patterns = [b"excluded_*"]
path_filter = ignore_directories_patterns(dirname, exclude_patterns)
directory_f = Directory.from_disk(path=dir_path, path_filter=path_filter)
assert set(directory_f.keys()) == {b"baz", b"foo", b"foofile", b"file"}
# XXX should foo/excluded_dir and foo/excluded_dir2 be excluded as
# well? Currently they are not
assert set(directory_f[b"foo"].keys()) == {
b"foo",
b"excluded_dir2\x96",
b"excluded_dir",
}
assert (
str(directory_f.swhid())
== "swh:1:dir:adaeb949e1f09d28d334b7e360691ef9df934703"
)
@pytest.mark.fs
class TarballTest(DataMixin, unittest.TestCase):
def setUp(self):
super().setUp()
......@@ -693,57 +1118,83 @@ class TarballTest(DataMixin, unittest.TestCase):
def test_contents_match(self):
directory = Directory.from_disk(
path=os.path.join(self.tmpdir_name, b'sample-folder')
path=os.path.join(self.tmpdir_name, b"sample-folder")
)
for name, data in self.tarball_contents.items():
for name, expected in self.tarball_contents.items():
obj = directory[name]
if isinstance(obj, Content):
self.assertContentEqual(obj, data)
self.assertContentEqual(obj, expected)
elif isinstance(obj, Directory):
self.assertDirectoryEqual(obj, data)
self.assertDirectoryEqual(obj, expected)
else:
raise self.failureException('Unknown type for %s' % obj)
raise self.failureException("Unknown type for %s" % obj)
class TarballIterDirectory(DataMixin, unittest.TestCase):
def setUp(self):
super().setUp()
self.make_from_tarball(self.tmpdir_name)
def test_iter_directory(self):
"""Iter from_disk.directory should yield the full arborescence tree"""
directory = Directory.from_disk(
path=os.path.join(self.tmpdir_name, b"sample-folder")
)
contents, skipped_contents, directories = from_disk.iter_directory(directory)
expected_nb = defaultdict(int)
for name in self.tarball_contents.keys():
obj = directory[name]
expected_nb[obj.object_type] += 1
assert len(contents) == expected_nb[FromDiskType.CONTENT] and len(contents) > 0
assert len(skipped_contents) == 0
assert (
len(directories) == expected_nb[FromDiskType.DIRECTORY]
and len(directories) > 0
)
class DirectoryManipulation(DataMixin, unittest.TestCase):
def test_directory_access_nested(self):
d = Directory()
d[b'a'] = Directory()
d[b'a/b'] = Directory()
d[b"a"] = Directory()
d[b"a/b"] = Directory()
self.assertEqual(d[b'a/b'].get_data(), self.empty_directory)
self.assertEqual(d[b"a/b"].get_data(), self.empty_directory)
def test_directory_del_nested(self):
d = Directory()
d[b'a'] = Directory()
d[b'a/b'] = Directory()
d[b"a"] = Directory()
d[b"a/b"] = Directory()
with self.assertRaisesRegex(KeyError, "b'c'"):
del d[b'a/b/c']
del d[b"a/b/c"]
with self.assertRaisesRegex(KeyError, "b'level2'"):
del d[b'a/level2/c']
del d[b"a/level2/c"]
del d[b'a/b']
del d[b"a/b"]
self.assertEqual(d[b'a'].get_data(), self.empty_directory)
self.assertEqual(d[b"a"].get_data(), self.empty_directory)
def test_directory_access_self(self):
d = Directory()
self.assertIs(d, d[b''])
self.assertIs(d, d[b'/'])
self.assertIs(d, d[b'//'])
self.assertIs(d, d[b""])
self.assertIs(d, d[b"/"])
self.assertIs(d, d[b"//"])
def test_directory_access_wrong_type(self):
d = Directory()
with self.assertRaisesRegex(ValueError, 'bytes from Directory'):
d['foo']
with self.assertRaisesRegex(ValueError, 'bytes from Directory'):
with self.assertRaisesRegex(ValueError, "bytes from Directory"):
d["foo"]
with self.assertRaisesRegex(ValueError, "bytes from Directory"):
d[42]
def test_directory_repr(self):
entries = [b'a', b'b', b'c']
entries = [b"a", b"b", b"c"]
d = Directory()
for entry in entries:
d[entry] = Directory()
......@@ -756,32 +1207,48 @@ class DirectoryManipulation(DataMixin, unittest.TestCase):
def test_directory_set_wrong_type_name(self):
d = Directory()
with self.assertRaisesRegex(ValueError, 'bytes Directory entry'):
d['foo'] = Directory()
with self.assertRaisesRegex(ValueError, 'bytes Directory entry'):
with self.assertRaisesRegex(ValueError, "bytes Directory entry"):
d["foo"] = Directory()
with self.assertRaisesRegex(ValueError, "bytes Directory entry"):
d[42] = Directory()
def test_directory_set_nul_in_name(self):
d = Directory()
with self.assertRaisesRegex(ValueError, 'nul bytes'):
d[b'\x00\x01'] = Directory()
with self.assertRaisesRegex(ValueError, "nul bytes"):
d[b"\x00\x01"] = Directory()
def test_directory_set_empty_name(self):
d = Directory()
with self.assertRaisesRegex(ValueError, 'must have a name'):
d[b''] = Directory()
with self.assertRaisesRegex(ValueError, 'must have a name'):
d[b'/'] = Directory()
with self.assertRaisesRegex(ValueError, "must have a name"):
d[b""] = Directory()
with self.assertRaisesRegex(ValueError, "must have a name"):
d[b"/"] = Directory()
def test_directory_set_wrong_type(self):
d = Directory()
with self.assertRaisesRegex(ValueError, 'Content or Directory'):
d[b'entry'] = object()
with self.assertRaisesRegex(ValueError, "Content or Directory"):
d[b"entry"] = object()
def test_directory_del_wrong_type(self):
d = Directory()
with self.assertRaisesRegex(ValueError, 'bytes Directory entry'):
del d['foo']
with self.assertRaisesRegex(ValueError, 'bytes Directory entry'):
with self.assertRaisesRegex(ValueError, "bytes Directory entry"):
del d["foo"]
with self.assertRaisesRegex(ValueError, "bytes Directory entry"):
del d[42]
def test_directory_contains(self):
d = Directory()
d[b"a"] = Directory()
d[b"a/b"] = Directory()
d[b"a/b/c"] = Directory()
d[b"a/b/c/d"] = Content()
self.assertIn(b"a", d)
self.assertIn(b"a/b", d)
self.assertIn(b"a/b/c", d)
self.assertIn(b"a/b/c/d", d)
self.assertNotIn(b"b", d)
self.assertNotIn(b"b/c", d)
self.assertNotIn(b"b/c/d", d)
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.model.model import BaseContent, Origin
from .generate_testdata import ORIGINS, gen_contents, gen_origins
def test_gen_origins_empty():
origins = gen_origins(0)
assert not origins
def test_gen_origins_one():
origins = gen_origins(1)
assert len(origins) == 1
assert [Origin.from_dict(d) for d in origins]
def test_gen_origins_default():
origins = gen_origins()
assert len(origins) == 100
models = [Origin.from_dict(d).url for d in origins]
assert len(origins) == len(set(models))
def test_gen_origins_max():
nmax = len(ORIGINS)
origins = gen_origins(nmax + 1)
assert len(origins) == nmax
models = {Origin.from_dict(d).url for d in origins}
# ensure we did not generate the same origin twice
assert len(origins) == len(models)
def test_gen_contents_empty():
contents = gen_contents(0)
assert not contents
def test_gen_contents_one():
contents = gen_contents(1)
assert len(contents) == 1
assert [BaseContent.from_dict(d) for d in contents]
def test_gen_contents_default():
contents = gen_contents()
assert len(contents) == 20
models = {BaseContent.from_dict(d) for d in contents}
# ensure we did not generate the same content twice
assert len(contents) == len(models)
# Copyright (C) 2015-2018 The Software Heritage developers
# Copyright (C) 2015-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import contextlib
import hashlib
import io
import os
import tempfile
import unittest
from unittest.mock import patch
import pytest
from swh.model import hashutil
from swh.model.hashutil import MultiHash
class BaseHashutil(unittest.TestCase):
def setUp(self):
# Reset function cache
hashutil._blake2_hash_cache = {}
self.data = b'1984\n'
self.hex_checksums = {
'sha1': '62be35bf00ff0c624f4a621e2ea5595a049e0731',
'sha1_git': '568aaf43d83b2c3df8067f3bedbb97d83260be6d',
'sha256': '26602113b4b9afd9d55466b08580d3c2'
'4a9b50ee5b5866c0d91fab0e65907311',
'blake2s256': '63cfb259e1fdb485bc5c55749697a6b21ef31fb7445f6c78a'
'c9422f9f2dc8906',
from swh.model.hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytehex
@contextlib.contextmanager
def patch_blake2(function_name):
try:
with patch(function_name) as mock:
yield mock
finally:
# mocking blake2 inserts mock objects in the cache; we need
# to clean it before the next test runs
hashutil._blake2_hash_cache.clear()
@pytest.fixture(autouse=True)
def blake2_hash_cache_reset():
# Reset function cache
hashutil._blake2_hash_cache = {}
@pytest.fixture
def hash_test_data():
class HashTestData:
data = b"1984\n"
hex_checksums = {
"sha1": "62be35bf00ff0c624f4a621e2ea5595a049e0731",
"sha1_git": "568aaf43d83b2c3df8067f3bedbb97d83260be6d",
"sha256": "26602113b4b9afd9d55466b08580d3c2"
"4a9b50ee5b5866c0d91fab0e65907311",
"blake2s256": "63cfb259e1fdb485bc5c55749697a6b21ef31fb7445f6c78a"
"c9422f9f2dc8906",
}
self.checksums = {
type: bytes.fromhex(cksum)
for type, cksum in self.hex_checksums.items()
checksums = {
type: bytes.fromhex(cksum) for type, cksum in hex_checksums.items()
}
self.bytehex_checksums = {
type: hashutil.hash_to_bytehex(cksum)
for type, cksum in self.checksums.items()
bytehex_checksums = {
type: hashutil.hash_to_bytehex(cksum) for type, cksum in checksums.items()
}
self.git_hex_checksums = {
'blob': self.hex_checksums['sha1_git'],
'tree': '5b2e883aa33d2efab98442693ea4dd5f1b8871b0',
'commit': '79e4093542e72f0fcb7cbd75cb7d270f9254aa8f',
'tag': 'd6bf62466f287b4d986c545890716ce058bddf67',
git_hex_checksums = {
"blob": hex_checksums["sha1_git"],
"tree": "5b2e883aa33d2efab98442693ea4dd5f1b8871b0",
"commit": "79e4093542e72f0fcb7cbd75cb7d270f9254aa8f",
"tag": "d6bf62466f287b4d986c545890716ce058bddf67",
}
self.git_checksums = {
type: bytes.fromhex(cksum)
for type, cksum in self.git_hex_checksums.items()
git_checksums = {
type: bytes.fromhex(cksum) for type, cksum in git_hex_checksums.items()
}
return HashTestData
class MultiHashTest(BaseHashutil):
def test_multi_hash_data(self):
checksums = MultiHash.from_data(self.data).digest()
self.assertEqual(checksums, self.checksums)
self.assertFalse('length' in checksums)
def test_multi_hash_data_with_length(self):
expected_checksums = self.checksums.copy()
expected_checksums['length'] = len(self.data)
def test_multi_hash_data(hash_test_data):
checksums = MultiHash.from_data(hash_test_data.data).digest()
assert checksums == hash_test_data.checksums
assert "length" not in checksums
algos = set(['length']).union(hashutil.DEFAULT_ALGORITHMS)
checksums = MultiHash.from_data(self.data, hash_names=algos).digest()
self.assertEqual(checksums, expected_checksums)
self.assertTrue('length' in checksums)
def test_multi_hash_data_with_length(hash_test_data):
expected_checksums = hash_test_data.checksums.copy()
expected_checksums["length"] = len(hash_test_data.data)
def test_multi_hash_data_unknown_hash(self):
with self.assertRaises(ValueError) as cm:
MultiHash.from_data(self.data, ['unknown-hash'])
algos = set(["length"]).union(hashutil.DEFAULT_ALGORITHMS)
checksums = MultiHash.from_data(hash_test_data.data, hash_names=algos).digest()
self.assertIn('Unexpected hashing algorithm', cm.exception.args[0])
self.assertIn('unknown-hash', cm.exception.args[0])
assert checksums == expected_checksums
assert "length" in checksums
def test_multi_hash_file(self):
fobj = io.BytesIO(self.data)
checksums = MultiHash.from_file(fobj, length=len(self.data)).digest()
self.assertEqual(checksums, self.checksums)
def test_multi_hash_data_unknown_hash(hash_test_data):
with pytest.raises(ValueError, match="Unexpected hashing algorithm.*unknown-hash"):
MultiHash.from_data(hash_test_data.data, ["unknown-hash"])
def test_multi_hash_file_hexdigest(self):
fobj = io.BytesIO(self.data)
length = len(self.data)
checksums = MultiHash.from_file(fobj, length=length).hexdigest()
self.assertEqual(checksums, self.hex_checksums)
def test_multi_hash_file_bytehexdigest(self):
fobj = io.BytesIO(self.data)
length = len(self.data)
checksums = MultiHash.from_file(fobj, length=length).bytehexdigest()
self.assertEqual(checksums, self.bytehex_checksums)
def test_multi_hash_file(hash_test_data):
fobj = io.BytesIO(hash_test_data.data)
def test_multi_hash_file_missing_length(self):
fobj = io.BytesIO(self.data)
with self.assertRaises(ValueError) as cm:
MultiHash.from_file(fobj, hash_names=['sha1_git'])
checksums = MultiHash.from_file(fobj, length=len(hash_test_data.data)).digest()
assert checksums == hash_test_data.checksums
self.assertIn('Missing length', cm.exception.args[0])
def test_multi_hash_path(self):
with tempfile.NamedTemporaryFile(delete=False) as f:
f.write(self.data)
def test_multi_hash_file_hexdigest(hash_test_data):
fobj = io.BytesIO(hash_test_data.data)
length = len(hash_test_data.data)
checksums = MultiHash.from_file(fobj, length=length).hexdigest()
assert checksums == hash_test_data.hex_checksums
hashes = MultiHash.from_path(f.name).digest()
os.remove(f.name)
self.assertEqual(self.checksums, hashes)
def test_multi_hash_file_bytehexdigest(hash_test_data):
fobj = io.BytesIO(hash_test_data.data)
length = len(hash_test_data.data)
checksums = MultiHash.from_file(fobj, length=length).bytehexdigest()
assert checksums == hash_test_data.bytehex_checksums
class Hashutil(BaseHashutil):
EXTRA_HASH_ALGOS = ["md5", "sha512"]
def test_hash_git_data(self):
checksums = {
git_type: hashutil.hash_git_data(self.data, git_type)
for git_type in self.git_checksums
}
self.assertEqual(checksums, self.git_checksums)
def test_hash_git_data_unknown_git_type(self):
with self.assertRaises(ValueError) as cm:
hashutil.hash_git_data(self.data, 'unknown-git-type')
self.assertIn('Unexpected git object type', cm.exception.args[0])
self.assertIn('unknown-git-type', cm.exception.args[0])
def test_hash_to_hex(self):
for type in self.checksums:
hex = self.hex_checksums[type]
hash = self.checksums[type]
self.assertEqual(hashutil.hash_to_hex(hex), hex)
self.assertEqual(hashutil.hash_to_hex(hash), hex)
def test_hash_to_bytes(self):
for type in self.checksums:
hex = self.hex_checksums[type]
hash = self.checksums[type]
self.assertEqual(hashutil.hash_to_bytes(hex), hash)
self.assertEqual(hashutil.hash_to_bytes(hash), hash)
def test_hash_to_bytehex(self):
for algo in self.checksums:
self.assertEqual(self.hex_checksums[algo].encode('ascii'),
hashutil.hash_to_bytehex(self.checksums[algo]))
def test_bytehex_to_hash(self):
for algo in self.checksums:
self.assertEqual(self.checksums[algo],
hashutil.bytehex_to_hash(
self.hex_checksums[algo].encode()))
def test_new_hash_unsupported_hashing_algorithm(self):
try:
hashutil._new_hash('blake2:10')
except ValueError as e:
self.assertEqual(str(e),
'Unexpected hashing algorithm blake2:10, '
'expected one of blake2b512, blake2s256, '
'sha1, sha1_git, sha256')
@pytest.mark.parametrize("hash_algo", EXTRA_HASH_ALGOS)
def test_multi_hash_file_with_extra_hash_algo(hash_test_data, hash_algo):
fobj = io.BytesIO(hash_test_data.data)
checksums = MultiHash.from_file(
fobj,
hash_names=DEFAULT_ALGORITHMS | {hash_algo},
length=len(hash_test_data.data),
).digest()
checksum = {hash_algo: hashlib.new(hash_algo, hash_test_data.data).digest()}
assert checksums == {**hash_test_data.checksums, **checksum}
@pytest.mark.parametrize("hash_algo", EXTRA_HASH_ALGOS)
def test_multi_hash_file_hexdigest_with_extra_hash_algo(hash_test_data, hash_algo):
fobj = io.BytesIO(hash_test_data.data)
length = len(hash_test_data.data)
checksums = MultiHash.from_file(
fobj, hash_names=DEFAULT_ALGORITHMS | {hash_algo}, length=length
).hexdigest()
checksum = {hash_algo: hashlib.new(hash_algo, hash_test_data.data).hexdigest()}
assert checksums == {**hash_test_data.hex_checksums, **checksum}
@pytest.mark.parametrize("hash_algo", EXTRA_HASH_ALGOS)
def test_multi_hash_file_bytehexdigest_with_extra_algo(hash_test_data, hash_algo):
fobj = io.BytesIO(hash_test_data.data)
length = len(hash_test_data.data)
checksums = MultiHash.from_file(
fobj, hash_names=DEFAULT_ALGORITHMS | {hash_algo}, length=length
).bytehexdigest()
checksum = {
hash_algo: hash_to_bytehex(hashlib.new(hash_algo, hash_test_data.data).digest())
}
assert checksums == {**hash_test_data.bytehex_checksums, **checksum}
def test_multi_hash_file_missing_length(hash_test_data):
fobj = io.BytesIO(hash_test_data.data)
with pytest.raises(ValueError, match="Missing length"):
MultiHash.from_file(fobj, hash_names=["sha1_git"])
def test_multi_hash_path(hash_test_data):
with tempfile.NamedTemporaryFile(delete=False) as f:
f.write(hash_test_data.data)
hashes = MultiHash.from_path(f.name).digest()
os.remove(f.name)
assert hash_test_data.checksums == hashes
def test_hash_git_data(hash_test_data):
checksums = {
git_type: hashutil.hash_git_data(hash_test_data.data, git_type)
for git_type in hash_test_data.git_checksums
}
assert checksums == hash_test_data.git_checksums
def test_hash_git_data_unknown_git_type(hash_test_data):
with pytest.raises(
ValueError, match="Unexpected git object type.*unknown-git-type"
):
hashutil.hash_git_data(hash_test_data.data, "unknown-git-type")
@patch('hashlib.new')
def test_new_hash_blake2b_blake2b512_builtin(self, mock_hashlib_new):
if 'blake2b512' not in hashlib.algorithms_available:
self.skipTest('blake2b512 not built-in')
mock_hashlib_new.return_value = sentinel = object()
h = hashutil._new_hash('blake2b512')
self.assertIs(h, sentinel)
mock_hashlib_new.assert_called_with('blake2b512')
@patch('hashlib.new')
def test_new_hash_blake2s_blake2s256_builtin(self, mock_hashlib_new):
if 'blake2s256' not in hashlib.algorithms_available:
self.skipTest('blake2s256 not built-in')
mock_hashlib_new.return_value = sentinel = object()
h = hashutil._new_hash('blake2s256')
self.assertIs(h, sentinel)
mock_hashlib_new.assert_called_with('blake2s256')
def test_new_hash_blake2b_builtin(self):
removed_hash = False
try:
if 'blake2b512' in hashlib.algorithms_available:
removed_hash = True
hashlib.algorithms_available.remove('blake2b512')
if 'blake2b' not in hashlib.algorithms_available:
self.skipTest('blake2b not built in')
with patch('hashlib.blake2b') as mock_blake2b:
mock_blake2b.return_value = sentinel = object()
h = hashutil._new_hash('blake2b512')
self.assertIs(h, sentinel)
mock_blake2b.assert_called_with(digest_size=512//8)
finally:
if removed_hash:
hashlib.algorithms_available.add('blake2b512')
def test_new_hash_blake2s_builtin(self):
removed_hash = False
try:
if 'blake2s256' in hashlib.algorithms_available:
removed_hash = True
hashlib.algorithms_available.remove('blake2s256')
if 'blake2s' not in hashlib.algorithms_available:
self.skipTest('blake2s not built in')
with patch('hashlib.blake2s') as mock_blake2s:
mock_blake2s.return_value = sentinel = object()
h = hashutil._new_hash('blake2s256')
self.assertIs(h, sentinel)
mock_blake2s.assert_called_with(digest_size=256//8)
finally:
if removed_hash:
hashlib.algorithms_available.add('blake2s256')
def test_new_hash_blake2b_pyblake2(self):
if 'blake2b512' in hashlib.algorithms_available:
self.skipTest('blake2b512 built in')
if 'blake2b' in hashlib.algorithms_available:
self.skipTest('blake2b built in')
with patch('pyblake2.blake2b') as mock_blake2b:
mock_blake2b.return_value = sentinel = object()
h = hashutil._new_hash('blake2b512')
self.assertIs(h, sentinel)
mock_blake2b.assert_called_with(digest_size=512//8)
def test_new_hash_blake2s_pyblake2(self):
if 'blake2s256' in hashlib.algorithms_available:
self.skipTest('blake2s256 built in')
if 'blake2s' in hashlib.algorithms_available:
self.skipTest('blake2s built in')
def test_hash_to_hex(hash_test_data):
for type in hash_test_data.checksums:
hex = hash_test_data.hex_checksums[type]
hash = hash_test_data.checksums[type]
assert hashutil.hash_to_hex(hex) == hex
assert hashutil.hash_to_hex(hash) == hex
with patch('pyblake2.blake2s') as mock_blake2s:
mock_blake2s.return_value = sentinel = object()
h = hashutil._new_hash('blake2s256')
def test_hash_to_bytes(hash_test_data):
for type in hash_test_data.checksums:
hex = hash_test_data.hex_checksums[type]
hash = hash_test_data.checksums[type]
assert hashutil.hash_to_bytes(hex) == hash
assert hashutil.hash_to_bytes(hash) == hash
self.assertIs(h, sentinel)
mock_blake2s.assert_called_with(digest_size=256//8)
def test_hash_to_bytehex(hash_test_data):
for algo in hash_test_data.checksums:
hex_checksum = hash_test_data.hex_checksums[algo].encode("ascii")
assert hex_checksum == hashutil.hash_to_bytehex(hash_test_data.checksums[algo])
class HashlibGit(unittest.TestCase):
def setUp(self):
self.blob_data = b'42\n'
def test_bytehex_to_hash(hash_test_data):
for algo in hash_test_data.checksums:
assert hash_test_data.checksums[algo] == hashutil.bytehex_to_hash(
hash_test_data.hex_checksums[algo].encode()
)
self.tree_data = b''.join([b'40000 barfoo\0',
bytes.fromhex('c3020f6bf135a38c6df'
'3afeb5fb38232c5e07087'),
b'100644 blah\0',
bytes.fromhex('63756ef0df5e4f10b6efa'
'33cfe5c758749615f20'),
b'100644 hello\0',
bytes.fromhex('907b308167f0880fb2a'
'5c0e1614bb0c7620f9dc3')])
self.commit_data = """tree 1c61f7259dcb770f46b194d941df4f08ff0a3970
def test_new_hash_unsupported_hashing_algorithm():
expected_message = (
"Unexpected hashing algorithm blake2:10, "
"expected one of blake2b512, blake2s256, "
"md5, sha1, sha1_git, sha256"
)
with pytest.raises(ValueError, match=expected_message):
hashutil._new_hash("blake2:10")
def test_new_hash_blake2b_builtin():
with patch_blake2("hashlib.blake2b") as mock_blake2b:
mock_blake2b.return_value = sentinel = object()
h = hashutil._new_hash("blake2b512")
assert h is sentinel
mock_blake2b.assert_called_with(digest_size=512 // 8)
def test_new_hash_blake2s_builtin():
with patch_blake2("hashlib.blake2s") as mock_blake2s:
mock_blake2s.return_value = sentinel = object()
h = hashutil._new_hash("blake2s256")
assert h is sentinel
mock_blake2s.assert_called_with(digest_size=256 // 8)
@pytest.fixture
def hashgit_test_data():
class HashGitTestData:
blob_data = b"42\n"
tree_data = b"".join(
[
b"40000 barfoo\0",
bytes.fromhex("c3020f6bf135a38c6df" "3afeb5fb38232c5e07087"),
b"100644 blah\0",
bytes.fromhex("63756ef0df5e4f10b6efa" "33cfe5c758749615f20"),
b"100644 hello\0",
bytes.fromhex("907b308167f0880fb2a" "5c0e1614bb0c7620f9dc3"),
]
)
commit_data = b"""\
tree 1c61f7259dcb770f46b194d941df4f08ff0a3970
author Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com> 1444054085 +0200
committer Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com> 1444054085 +0200
initial
""".encode('utf-8') # NOQA
self.tag_data = """object 24d012aaec0bc5a4d2f62c56399053d6cc72a241
""" # noqa
tag_data = """object 24d012aaec0bc5a4d2f62c56399053d6cc72a241
type commit
tag 0.0.1
tagger Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com> 1444225145 +0200
blah
""".encode('utf-8') # NOQA
self.checksums = {
'blob_sha1_git': bytes.fromhex('d81cc0710eb6cf9efd5b920a8453e1'
'e07157b6cd'),
'tree_sha1_git': bytes.fromhex('ac212302c45eada382b27bfda795db'
'121dacdb1c'),
'commit_sha1_git': bytes.fromhex('e960570b2e6e2798fa4cfb9af2c399'
'd629189653'),
'tag_sha1_git': bytes.fromhex('bc2b99ba469987bcf1272c189ed534'
'e9e959f120'),
""".encode(
"utf-8"
) # NOQA
checksums = {
"blob_sha1_git": bytes.fromhex(
"d81cc0710eb6cf9efd5b920a8453e1" "e07157b6cd"
),
"tree_sha1_git": bytes.fromhex(
"ac212302c45eada382b27bfda795db" "121dacdb1c"
),
"commit_sha1_git": bytes.fromhex(
"e960570b2e6e2798fa4cfb9af2c399" "d629189653"
),
"tag_sha1_git": bytes.fromhex(
"bc2b99ba469987bcf1272c189ed534" "e9e959f120"
),
}
def test_unknown_header_type(self):
with self.assertRaises(ValueError) as cm:
hashutil.hash_git_data(b'any-data', 'some-unknown-type')
return HashGitTestData
def test_unknown_header_type():
with pytest.raises(ValueError, match="Unexpected git object type"):
hashutil.hash_git_data(b"any-data", "some-unknown-type")
def test_hashdata_content(hashgit_test_data):
# when
actual_hash = hashutil.hash_git_data(hashgit_test_data.blob_data, git_type="blob")
# then
assert actual_hash == hashgit_test_data.checksums["blob_sha1_git"]
self.assertIn('Unexpected git object type', cm.exception.args[0])
def test_hashdata_content(self):
# when
actual_hash = hashutil.hash_git_data(self.blob_data, git_type='blob')
def test_hashdata_tree(hashgit_test_data):
# when
actual_hash = hashutil.hash_git_data(hashgit_test_data.tree_data, git_type="tree")
# then
self.assertEqual(actual_hash,
self.checksums['blob_sha1_git'])
# then
assert actual_hash == hashgit_test_data.checksums["tree_sha1_git"]
def test_hashdata_tree(self):
# when
actual_hash = hashutil.hash_git_data(self.tree_data, git_type='tree')
# then
self.assertEqual(actual_hash,
self.checksums['tree_sha1_git'])
def test_hashdata_revision(hashgit_test_data):
# when
actual_hash = hashutil.hash_git_data(
hashgit_test_data.commit_data, git_type="commit"
)
def test_hashdata_revision(self):
# when
actual_hash = hashutil.hash_git_data(self.commit_data,
git_type='commit')
# then
assert actual_hash == hashgit_test_data.checksums["commit_sha1_git"]
# then
self.assertEqual(actual_hash,
self.checksums['commit_sha1_git'])
def test_hashdata_tag(self):
# when
actual_hash = hashutil.hash_git_data(self.tag_data, git_type='tag')
def test_hashdata_tag(hashgit_test_data):
# when
actual_hash = hashutil.hash_git_data(hashgit_test_data.tag_data, git_type="tag")
# then
self.assertEqual(actual_hash,
self.checksums['tag_sha1_git'])
# then
assert actual_hash == hashgit_test_data.checksums["tag_sha1_git"]
# Copyright (C) 2019 The Software Heritage developers
# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
import attr
from hypothesis import given
from hypothesis import given, settings
import iso8601
from swh.model.hashutil import DEFAULT_ALGORITHMS
from swh.model.hypothesis_strategies import objects, object_dicts
from swh.model.hypothesis_strategies import (
aware_datetimes,
contents,
object_dicts,
objects,
origin_visits,
persons,
skipped_contents,
snapshots,
)
from swh.model.model import ModelObjectType, SnapshotTargetType
target_types = (
'content', 'directory', 'revision', 'release', 'snapshot', 'alias')
target_types = ("content", "directory", "revision", "release", "snapshot", "alias")
all_but_skipped_content = {
o_t for o_t in ModelObjectType if o_t != ModelObjectType.SKIPPED_CONTENT
}
@given(objects())
@given(objects(blacklist_types=()))
def test_generation(obj_type_and_obj):
(obj_type, object_) = obj_type_and_obj
attr.validate(object_)
@given(objects(split_content=False))
def test_generation_merged_content(obj_type_and_obj):
# we should never generate a "skipped_content" here
assert obj_type_and_obj[0] != ModelObjectType.SKIPPED_CONTENT
@given(objects(split_content=True, blacklist_types=all_but_skipped_content))
def test_generation_split_content(obj_type_and_obj):
# we should only generate "skipped_content"
assert obj_type_and_obj[0] == ModelObjectType.SKIPPED_CONTENT
@given(
objects(
blacklist_types={
ModelObjectType.DIRECTORY,
ModelObjectType.ORIGIN_VISIT,
}
)
)
def test_generation_blacklist(obj_type_and_obj):
assert obj_type_and_obj[0] not in {
ModelObjectType.DIRECTORY,
ModelObjectType.ORIGIN_VISIT,
}
def assert_nested_dict(obj):
"""Tests the object is a nested dict and contains no more class
from swh.model.model."""
if isinstance(obj, dict):
for (key, value) in obj.items():
for key, value in obj.items():
assert isinstance(key, (str, bytes)), key
assert_nested_dict(value)
elif isinstance(obj, list):
elif isinstance(obj, tuple):
for value in obj:
assert_nested_dict(value)
elif isinstance(obj, (int, float, str, bytes, bool, type(None))):
elif isinstance(obj, (int, float, str, bytes, bool, type(None), datetime.datetime)):
pass
else:
assert False, obj
@given(object_dicts())
@given(object_dicts(blacklist_types=()))
def test_dicts_generation(obj_type_and_obj):
(obj_type, object_) = obj_type_and_obj
assert_nested_dict(object_)
if obj_type == 'content':
if object_['status'] == 'visible':
assert set(object_) == \
set(DEFAULT_ALGORITHMS) | {'length', 'status', 'data'}
elif object_['status'] == 'absent':
assert set(object_) == \
set(DEFAULT_ALGORITHMS) | {'length', 'status', 'reason'}
elif object_['status'] == 'hidden':
assert set(object_) == \
set(DEFAULT_ALGORITHMS) | {'length', 'status', 'data'}
if obj_type == ModelObjectType.CONTENT:
COMMON_KEYS = set(DEFAULT_ALGORITHMS) | {"length", "status", "ctime"}
if object_["status"] == "visible":
assert set(object_) <= COMMON_KEYS | {"data"}
elif object_["status"] == "absent":
assert set(object_) == COMMON_KEYS | {"reason"}
elif object_["status"] == "hidden":
assert set(object_) <= COMMON_KEYS | {"data"}
else:
assert False, object_
elif obj_type == 'release':
assert object_['target_type'] in target_types
elif obj_type == 'snapshot':
for branch in object_['branches'].values():
assert branch['target_type'] in target_types
elif obj_type == ModelObjectType.RELEASE:
assert object_["target_type"] in target_types
elif obj_type == ModelObjectType.SNAPSHOT:
for branch in object_["branches"].values():
assert branch is None or branch["target_type"] in target_types
@given(aware_datetimes())
def test_datetimes(dt):
# Checks this doesn't raise an error, eg. about seconds in the TZ offset
iso8601.parse_date(dt.isoformat())
assert dt.tzinfo is not None
@given(object_dicts(split_content=False))
def test_dicts_generation_merged_content(obj_type_and_obj):
# we should never generate a "skipped_content" here
assert obj_type_and_obj[0] != ModelObjectType.SKIPPED_CONTENT
@given(object_dicts(split_content=True, blacklist_types=all_but_skipped_content))
def test_dicts_generation_split_content(obj_type_and_obj):
# we should only generate "skipped_content"
assert obj_type_and_obj[0] == ModelObjectType.SKIPPED_CONTENT
@given(
object_dicts(
blacklist_types={
ModelObjectType.CONTENT,
ModelObjectType.RELEASE,
}
)
)
def test_dicts_generation_blacklist(obj_type_and_obj):
assert obj_type_and_obj[0] not in {
ModelObjectType.CONTENT,
ModelObjectType.RELEASE,
}
@given(objects())
def test_model_to_dicts(obj_type_and_obj):
_, object_ = obj_type_and_obj
object_type = object_.object_type
obj_dict = object_.to_dict()
assert_nested_dict(obj_dict)
if object_type in {ModelObjectType.CONTENT, ModelObjectType.SKIPPED_CONTENT}:
COMMON_KEYS = set(DEFAULT_ALGORITHMS) | {"length", "status"}
if object_.ctime is not None:
COMMON_KEYS |= {"ctime"}
if obj_dict["status"] == "visible":
assert set(obj_dict) == COMMON_KEYS | {"data"}
elif obj_dict["status"] == "absent":
assert set(obj_dict) == COMMON_KEYS | {"reason"}
elif obj_dict["status"] == "hidden":
assert set(obj_dict) == COMMON_KEYS | {"data"}
else:
assert False, obj_dict
elif object_type == ModelObjectType.RELEASE:
assert obj_dict["target_type"] in target_types
elif object_type == ModelObjectType.RELEASE:
for branch in obj_dict["branches"].values():
assert branch is None or branch["target_type"] in target_types
@given(contents())
def test_content_aware_datetime(cont):
assert cont.ctime is None or cont.ctime.tzinfo is not None
@given(skipped_contents())
def test_skipped_content_aware_datetime(cont):
assert cont.ctime is None or cont.ctime.tzinfo is not None
_min_snp_size = 10
_max_snp_size = 100
@given(snapshots(min_size=_min_snp_size, max_size=_max_snp_size))
@settings(max_examples=1)
def test_snapshots_strategy(snapshot):
branches = snapshot.branches
assert len(branches) >= _min_snp_size
assert len(branches) <= _max_snp_size
aliases = []
# check snapshot integrity
for name, branch in branches.items():
assert branch is None or branch.target_type.value in target_types
if branch is not None and branch.target_type == SnapshotTargetType.ALIAS:
aliases.append(name)
assert branch.target in branches
# check no cycles between aliases
for alias in aliases:
processed_alias = set()
current_alias = alias
while (
branches[current_alias] is not None
and branches[current_alias].target_type == SnapshotTargetType.ALIAS
):
assert branches[current_alias].target not in processed_alias
processed_alias.add(current_alias)
current_alias = branches[current_alias].target
@given(snapshots(min_size=_min_snp_size, max_size=_min_snp_size))
@settings(max_examples=1)
def test_snapshots_strategy_fixed_size(snapshot):
assert len(snapshot.branches) == _min_snp_size
@given(origin_visits())
def test_origin_visit_aware_datetime(visit):
assert visit.date.tzinfo is not None
@given(persons())
def test_person_do_not_look_like_anonimized(person):
assert not (
len(person.fullname) == 32 and person.name is None and person.email is None
)
# Copyright (C) 2015-2018 The Software Heritage developers
# Copyright (C) 2015-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import binascii
import datetime
import hashlib
from typing import Dict
import unittest
from swh.model import hashutil, identifiers
from swh.model.exceptions import ValidationError
from swh.model.identifiers import (CONTENT, DIRECTORY,
PERSISTENT_IDENTIFIER_TYPES, RELEASE,
REVISION, SNAPSHOT, PersistentId)
import pytest
from swh.model import git_objects, hashutil
from swh.model.hashutil import hash_to_bytes as _x
from swh.model.model import (
Content,
Directory,
ExtID,
Origin,
RawExtrinsicMetadata,
Release,
Revision,
Snapshot,
TimestampWithTimezone,
)
class UtilityFunctionsIdentifier(unittest.TestCase):
def setUp(self):
self.str_id = 'c2e41aae41ac17bd4a650770d6ee77f62e52235b'
self.bytes_id = binascii.unhexlify(self.str_id)
self.bad_type_id = object()
def test_identifier_to_bytes(self):
for id in [self.str_id, self.bytes_id]:
self.assertEqual(identifiers.identifier_to_bytes(id),
self.bytes_id)
# wrong length
with self.assertRaises(ValueError) as cm:
identifiers.identifier_to_bytes(id[:-2])
self.assertIn('length', str(cm.exception))
with self.assertRaises(ValueError) as cm:
identifiers.identifier_to_bytes(self.bad_type_id)
self.assertIn('type', str(cm.exception))
def test_identifier_to_str(self):
for id in [self.str_id, self.bytes_id]:
self.assertEqual(identifiers.identifier_to_str(id),
self.str_id)
# wrong length
with self.assertRaises(ValueError) as cm:
identifiers.identifier_to_str(id[:-2])
self.assertIn('length', str(cm.exception))
with self.assertRaises(ValueError) as cm:
identifiers.identifier_to_str(self.bad_type_id)
self.assertIn('type', str(cm.exception))
def remove_id(d: Dict) -> Dict:
"""Returns a (shallow) copy of a dict with the 'id' key removed."""
d = d.copy()
if "id" in d:
del d["id"]
return d
class UtilityFunctionsDateOffset(unittest.TestCase):
def setUp(self):
self.dates = {
b'1448210036': {
'seconds': 1448210036,
'microseconds': 0,
b"1448210036": {
"seconds": 1448210036,
"microseconds": 0,
},
b'1448210036.002342': {
'seconds': 1448210036,
'microseconds': 2342,
b"1448210036.002342": {
"seconds": 1448210036,
"microseconds": 2342,
},
b"1448210036.12": {
"seconds": 1448210036,
"microseconds": 120000,
},
b'1448210036.12': {
'seconds': 1448210036,
'microseconds': 120000,
}
}
self.broken_dates = [
1448210036.12,
]
self.offsets = {
0: b'+0000',
-630: b'-1030',
800: b'+1320',
}
def test_format_date(self):
for date_repr, date in self.dates.items():
self.assertEqual(identifiers.format_date(date), date_repr)
self.assertEqual(git_objects.format_date(date), date_repr)
def test_format_date_fail(self):
for date in self.broken_dates:
with self.assertRaises(ValueError):
identifiers.format_date(date)
def test_format_offset(self):
for offset, res in self.offsets.items():
self.assertEqual(identifiers.format_offset(offset), res)
content_example = {
"status": "visible",
"length": 5,
"data": b"1984\n",
"ctime": datetime.datetime(2015, 11, 22, 16, 33, 56, tzinfo=datetime.timezone.utc),
}
class ContentIdentifier(unittest.TestCase):
def setUp(self):
self.content = {
'status': 'visible',
'length': 5,
'data': b'1984\n',
'ctime': datetime.datetime(2015, 11, 22, 16, 33, 56,
tzinfo=datetime.timezone.utc),
}
self.content_id = hashutil.MultiHash.from_data(
self.content['data']).digest()
self.content_id = hashutil.MultiHash.from_data(content_example["data"]).digest()
def test_content_identifier(self):
self.assertEqual(identifiers.content_identifier(self.content),
self.content_id)
self.assertEqual(
Content.from_data(content_example["data"]).hashes(), self.content_id
)
directory_example = {
"id": _x("d7ed3d2c31d608823be58b1cbe57605310615231"),
"entries": [
{
"type": "file",
"perms": 33188,
"name": b"README",
"target": _x("37ec8ea2110c0b7a32fbb0e872f6e7debbf95e21"),
},
{
"type": "file",
"perms": 33188,
"name": b"Rakefile",
"target": _x("3bb0e8592a41ae3185ee32266c860714980dbed7"),
},
{
"type": "dir",
"perms": 16384,
"name": b"app",
"target": _x("61e6e867f5d7ba3b40540869bc050b0c4fed9e95"),
},
{
"type": "file",
"perms": 33188,
"name": b"1.megabyte",
"target": _x("7c2b2fbdd57d6765cdc9d84c2d7d333f11be7fb3"),
},
{
"type": "dir",
"perms": 16384,
"name": b"config",
"target": _x("591dfe784a2e9ccc63aaba1cb68a765734310d98"),
},
{
"type": "dir",
"perms": 16384,
"name": b"public",
"target": _x("9588bf4522c2b4648bfd1c61d175d1f88c1ad4a5"),
},
{
"type": "file",
"perms": 33188,
"name": b"development.sqlite3",
"target": _x("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"),
},
{
"type": "dir",
"perms": 16384,
"name": b"doc",
"target": _x("154705c6aa1c8ead8c99c7915373e3c44012057f"),
},
{
"type": "dir",
"perms": 16384,
"name": b"db",
"target": _x("85f157bdc39356b7bc7de9d0099b4ced8b3b382c"),
},
{
"type": "dir",
"perms": 16384,
"name": b"log",
"target": _x("5e3d3941c51cce73352dff89c805a304ba96fffe"),
},
{
"type": "dir",
"perms": 16384,
"name": b"script",
"target": _x("1b278423caf176da3f3533592012502aa10f566c"),
},
{
"type": "dir",
"perms": 16384,
"name": b"test",
"target": _x("035f0437c080bfd8711670b3e8677e686c69c763"),
},
{
"type": "dir",
"perms": 16384,
"name": b"vendor",
"target": _x("7c0dc9ad978c1af3f9a4ce061e50f5918bd27138"),
},
{
"type": "rev",
"perms": 57344,
"name": b"will_paginate",
"target": _x("3d531e169db92a16a9a8974f0ae6edf52e52659e"),
},
# in git order, the dir named "order" should be between the files
# named "order." and "order0"
{
"type": "dir",
"perms": 16384,
"name": b"order",
"target": _x("62cdb7020ff920e5aa642c3d4066950dd1f01f4d"),
},
{
"type": "file",
"perms": 16384,
"name": b"order.",
"target": _x("0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33"),
},
{
"type": "file",
"perms": 16384,
"name": b"order0",
"target": _x("bbe960a25ea311d21d40669e93df2003ba9b90a2"),
},
],
}
class DirectoryIdentifier(unittest.TestCase):
def setUp(self):
self.directory = {
'id': 'c2e41aae41ac17bd4a650770d6ee77f62e52235b',
'entries': [
{
'type': 'file',
'perms': 33188,
'name': b'README',
'target': '37ec8ea2110c0b7a32fbb0e872f6e7debbf95e21'
},
{
'type': 'file',
'perms': 33188,
'name': b'Rakefile',
'target': '3bb0e8592a41ae3185ee32266c860714980dbed7'
},
{
'type': 'dir',
'perms': 16384,
'name': b'app',
'target': '61e6e867f5d7ba3b40540869bc050b0c4fed9e95'
},
{
'type': 'file',
'perms': 33188,
'name': b'1.megabyte',
'target': '7c2b2fbdd57d6765cdc9d84c2d7d333f11be7fb3'
},
{
'type': 'dir',
'perms': 16384,
'name': b'config',
'target': '591dfe784a2e9ccc63aaba1cb68a765734310d98'
},
{
'type': 'dir',
'perms': 16384,
'name': b'public',
'target': '9588bf4522c2b4648bfd1c61d175d1f88c1ad4a5'
},
{
'type': 'file',
'perms': 33188,
'name': b'development.sqlite3',
'target': 'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391'
},
{
'type': 'dir',
'perms': 16384,
'name': b'doc',
'target': '154705c6aa1c8ead8c99c7915373e3c44012057f'
},
{
'type': 'dir',
'perms': 16384,
'name': b'db',
'target': '85f157bdc39356b7bc7de9d0099b4ced8b3b382c'
},
{
'type': 'dir',
'perms': 16384,
'name': b'log',
'target': '5e3d3941c51cce73352dff89c805a304ba96fffe'
},
{
'type': 'dir',
'perms': 16384,
'name': b'script',
'target': '1b278423caf176da3f3533592012502aa10f566c'
},
{
'type': 'dir',
'perms': 16384,
'name': b'test',
'target': '035f0437c080bfd8711670b3e8677e686c69c763'
},
{
'type': 'dir',
'perms': 16384,
'name': b'vendor',
'target': '7c0dc9ad978c1af3f9a4ce061e50f5918bd27138'
},
{
'type': 'rev',
'perms': 57344,
'name': b'will_paginate',
'target': '3d531e169db92a16a9a8974f0ae6edf52e52659e'
}
],
}
self.directory = directory_example
self.empty_directory = {
'id': '4b825dc642cb6eb9a060e54bf8d69288fbee4904',
'entries': [],
"id": "4b825dc642cb6eb9a060e54bf8d69288fbee4904",
"entries": [],
}
def test_dir_identifier(self):
self.assertEqual(Directory.from_dict(self.directory).id, self.directory["id"])
self.assertEqual(
Directory.from_dict(remove_id(self.directory)).id,
self.directory["id"],
)
def test_dir_identifier_entry_order(self):
# Reverse order of entries, check the id is still the same.
directory = {"entries": reversed(self.directory["entries"])}
self.assertEqual(
identifiers.directory_identifier(self.directory),
self.directory['id'])
Directory.from_dict(remove_id(directory)).id,
self.directory["id"],
)
def test_dir_identifier_empty_directory(self):
self.assertEqual(
identifiers.directory_identifier(self.empty_directory),
self.empty_directory['id'])
Directory.from_dict(remove_id(self.empty_directory)).id,
_x(self.empty_directory["id"]),
)
class RevisionIdentifier(unittest.TestCase):
def setUp(self):
linus_tz = datetime.timezone(datetime.timedelta(minutes=-420))
revision_example = {
"id": _x("bc0195aad0daa2ad5b0d76cce22b167bc3435590"),
"directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"),
"parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")],
"author": {
"name": b"Linus Torvalds",
"email": b"torvalds@linux-foundation.org",
"fullname": b"Linus Torvalds <torvalds@linux-foundation.org>",
},
"date": datetime.datetime(2015, 7, 12, 15, 10, 30, tzinfo=linus_tz),
"committer": {
"name": b"Linus Torvalds",
"email": b"torvalds@linux-foundation.org",
"fullname": b"Linus Torvalds <torvalds@linux-foundation.org>",
},
"committer_date": datetime.datetime(2015, 7, 12, 15, 10, 30, tzinfo=linus_tz),
"message": b"Linux 4.2-rc2\n",
"type": "git",
"synthetic": False,
}
linus_tz = datetime.timezone(datetime.timedelta(minutes=-420))
gpgsig = b'''\
class RevisionIdentifier(unittest.TestCase):
def setUp(self):
gpgsig = b"""\
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.13 (Darwin)
......@@ -241,290 +258,274 @@ jdTswYL6+MUdL8sB9pZ82D+BP/YAdHe69CyTu1lk9RT2pYtI/kkfjHubXBCYEJSG
lf1Qb5GDsQrZWgD+jtWTywOYHtCBwyCKSAXxSARMbNPeak9WPlcW/Jmu+fUcMe2x
dg1KdHOa34shrKDaOVzW
=od6m
-----END PGP SIGNATURE-----'''
self.revision = {
'id': 'bc0195aad0daa2ad5b0d76cce22b167bc3435590',
'directory': '85a74718d377195e1efd0843ba4f3260bad4fe07',
'parents': ['01e2d0627a9a6edb24c37db45db5ecb31e9de808'],
'author': {
'name': b'Linus Torvalds',
'email': b'torvalds@linux-foundation.org',
},
'date': datetime.datetime(2015, 7, 12, 15, 10, 30,
tzinfo=linus_tz),
'committer': {
'name': b'Linus Torvalds',
'email': b'torvalds@linux-foundation.org',
},
'committer_date': datetime.datetime(2015, 7, 12, 15, 10, 30,
tzinfo=linus_tz),
'message': b'Linux 4.2-rc2\n',
}
-----END PGP SIGNATURE-----"""
self.revision = revision_example
self.revision_none_metadata = {
'id': 'bc0195aad0daa2ad5b0d76cce22b167bc3435590',
'directory': '85a74718d377195e1efd0843ba4f3260bad4fe07',
'parents': ['01e2d0627a9a6edb24c37db45db5ecb31e9de808'],
'author': {
'name': b'Linus Torvalds',
'email': b'torvalds@linux-foundation.org',
"id": _x("bc0195aad0daa2ad5b0d76cce22b167bc3435590"),
"directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"),
"parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")],
"author": {
"name": b"Linus Torvalds",
"email": b"torvalds@linux-foundation.org",
},
'date': datetime.datetime(2015, 7, 12, 15, 10, 30,
tzinfo=linus_tz),
'committer': {
'name': b'Linus Torvalds',
'email': b'torvalds@linux-foundation.org',
"date": datetime.datetime(2015, 7, 12, 15, 10, 30, tzinfo=linus_tz),
"committer": {
"name": b"Linus Torvalds",
"email": b"torvalds@linux-foundation.org",
},
'committer_date': datetime.datetime(2015, 7, 12, 15, 10, 30,
tzinfo=linus_tz),
'message': b'Linux 4.2-rc2\n',
'metadata': None,
"committer_date": datetime.datetime(
2015, 7, 12, 15, 10, 30, tzinfo=linus_tz
),
"message": b"Linux 4.2-rc2\n",
"type": "git",
"synthetic": False,
"metadata": None,
}
self.synthetic_revision = {
'id': b'\xb2\xa7\xe1&\x04\x92\xe3D\xfa\xb3\xcb\xf9\x1b\xc1<\x91'
b'\xe0T&\xfd',
'author': {
'name': b'Software Heritage',
'email': b'robot@softwareheritage.org',
"id": _x("b2a7e1260492e344fab3cbf91bc13c91e05426fd"),
"author": {
"name": b"Software Heritage",
"email": b"robot@softwareheritage.org",
},
'date': {
'timestamp': {'seconds': 1437047495},
'offset': 0,
'negative_utc': False,
"date": {
"timestamp": {"seconds": 1437047495},
"offset_bytes": b"+0000",
},
'type': 'tar',
'committer': {
'name': b'Software Heritage',
'email': b'robot@softwareheritage.org',
"type": "tar",
"committer": {
"name": b"Software Heritage",
"email": b"robot@softwareheritage.org",
},
"committer_date": 1437047495,
"synthetic": True,
"parents": [],
"message": b"synthetic revision message\n",
"directory": _x("d11f00a6a0fea6055341d25584b5a96516c0d2b8"),
"metadata": {
"original_artifact": [
{
"archive_type": "tar",
"name": "gcc-5.2.0.tar.bz2",
"sha1_git": "39d281aff934d44b439730057e55b055e206a586",
"sha1": "fe3f5390949d47054b613edc36c557eb1d51c18e",
"sha256": "5f835b04b5f7dd4f4d2dc96190ec1621b8d89f"
"2dc6f638f9f8bc1b1014ba8cad",
}
]
},
'committer_date': 1437047495,
'synthetic': True,
'parents': [None],
'message': b'synthetic revision message\n',
'directory': b'\xd1\x1f\x00\xa6\xa0\xfe\xa6\x05SA\xd2U\x84\xb5\xa9'
b'e\x16\xc0\xd2\xb8',
'metadata': {'original_artifact': [
{'archive_type': 'tar',
'name': 'gcc-5.2.0.tar.bz2',
'sha1_git': '39d281aff934d44b439730057e55b055e206a586',
'sha1': 'fe3f5390949d47054b613edc36c557eb1d51c18e',
'sha256': '5f835b04b5f7dd4f4d2dc96190ec1621b8d89f'
'2dc6f638f9f8bc1b1014ba8cad'}]},
}
# cat commit.txt | git hash-object -t commit --stdin
self.revision_with_extra_headers = {
'id': '010d34f384fa99d047cdd5e2f41e56e5c2feee45',
'directory': '85a74718d377195e1efd0843ba4f3260bad4fe07',
'parents': ['01e2d0627a9a6edb24c37db45db5ecb31e9de808'],
'author': {
'name': b'Linus Torvalds',
'email': b'torvalds@linux-foundation.org',
'fullname': b'Linus Torvalds <torvalds@linux-foundation.org>',
"id": _x("010d34f384fa99d047cdd5e2f41e56e5c2feee45"),
"directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"),
"parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")],
"author": {
"name": b"Linus Torvalds",
"email": b"torvalds@linux-foundation.org",
"fullname": b"Linus Torvalds <torvalds@linux-foundation.org>",
},
'date': datetime.datetime(2015, 7, 12, 15, 10, 30,
tzinfo=linus_tz),
'committer': {
'name': b'Linus Torvalds',
'email': b'torvalds@linux-foundation.org',
'fullname': b'Linus Torvalds <torvalds@linux-foundation.org>',
"date": datetime.datetime(2015, 7, 12, 15, 10, 30, tzinfo=linus_tz),
"committer": {
"name": b"Linus Torvalds",
"email": b"torvalds@linux-foundation.org",
"fullname": b"Linus Torvalds <torvalds@linux-foundation.org>",
},
'committer_date': datetime.datetime(2015, 7, 12, 15, 10, 30,
tzinfo=linus_tz),
'message': b'Linux 4.2-rc2\n',
'metadata': {
'extra_headers': [
['svn-repo-uuid', '046f1af7-66c2-d61b-5410-ce57b7db7bff'],
['svn-revision', 10],
]
}
"committer_date": datetime.datetime(
2015, 7, 12, 15, 10, 30, tzinfo=linus_tz
),
"message": b"Linux 4.2-rc2\n",
"type": "git",
"synthetic": False,
"extra_headers": (
(b"svn-repo-uuid", b"046f1af7-66c2-d61b-5410-ce57b7db7bff"),
(b"svn-revision", b"10"),
),
}
self.revision_with_gpgsig = {
'id': '44cc742a8ca17b9c279be4cc195a93a6ef7a320e',
'directory': 'b134f9b7dc434f593c0bab696345548b37de0558',
'parents': ['689664ae944b4692724f13b709a4e4de28b54e57',
'c888305e1efbaa252d01b4e5e6b778f865a97514'],
'author': {
'name': b'Jiang Xin',
'email': b'worldhello.net@gmail.com',
'fullname': b'Jiang Xin <worldhello.net@gmail.com>',
},
'date': {
'timestamp': 1428538899,
'offset': 480,
"id": _x("44cc742a8ca17b9c279be4cc195a93a6ef7a320e"),
"directory": _x("b134f9b7dc434f593c0bab696345548b37de0558"),
"parents": [
_x("689664ae944b4692724f13b709a4e4de28b54e57"),
_x("c888305e1efbaa252d01b4e5e6b778f865a97514"),
],
"author": {
"name": b"Jiang Xin",
"email": b"worldhello.net@gmail.com",
"fullname": b"Jiang Xin <worldhello.net@gmail.com>",
},
'committer': {
'name': b'Jiang Xin',
'email': b'worldhello.net@gmail.com',
"date": {
"timestamp": 1428538899,
"offset": 480,
},
'committer_date': {
'timestamp': 1428538899,
'offset': 480,
"committer": {
"name": b"Jiang Xin",
"email": b"worldhello.net@gmail.com",
},
'metadata': {
'extra_headers': [
['gpgsig', gpgsig],
],
"committer_date": {
"timestamp": 1428538899,
"offset": 480,
},
'message': b'''Merge branch 'master' of git://github.com/alexhenrie/git-po
"extra_headers": ((b"gpgsig", gpgsig),),
"message": b"""Merge branch 'master' of git://github.com/alexhenrie/git-po
* 'master' of git://github.com/alexhenrie/git-po:
l10n: ca.po: update translation
'''
""",
"type": "git",
"synthetic": False,
}
self.revision_no_message = {
'id': '4cfc623c9238fa92c832beed000ce2d003fd8333',
'directory': 'b134f9b7dc434f593c0bab696345548b37de0558',
'parents': ['689664ae944b4692724f13b709a4e4de28b54e57',
'c888305e1efbaa252d01b4e5e6b778f865a97514'],
'author': {
'name': b'Jiang Xin',
'email': b'worldhello.net@gmail.com',
'fullname': b'Jiang Xin <worldhello.net@gmail.com>',
"id": _x("4cfc623c9238fa92c832beed000ce2d003fd8333"),
"directory": _x("b134f9b7dc434f593c0bab696345548b37de0558"),
"parents": [
_x("689664ae944b4692724f13b709a4e4de28b54e57"),
_x("c888305e1efbaa252d01b4e5e6b778f865a97514"),
],
"author": {
"name": b"Jiang Xin",
"email": b"worldhello.net@gmail.com",
"fullname": b"Jiang Xin <worldhello.net@gmail.com>",
},
'date': {
'timestamp': 1428538899,
'offset': 480,
"date": {
"timestamp": 1428538899,
"offset": 480,
},
'committer': {
'name': b'Jiang Xin',
'email': b'worldhello.net@gmail.com',
"committer": {
"name": b"Jiang Xin",
"email": b"worldhello.net@gmail.com",
},
'committer_date': {
'timestamp': 1428538899,
'offset': 480,
"committer_date": {
"timestamp": 1428538899,
"offset": 480,
},
'message': None,
"message": None,
"type": "git",
"synthetic": False,
}
self.revision_empty_message = {
'id': '7442cd78bd3b4966921d6a7f7447417b7acb15eb',
'directory': 'b134f9b7dc434f593c0bab696345548b37de0558',
'parents': ['689664ae944b4692724f13b709a4e4de28b54e57',
'c888305e1efbaa252d01b4e5e6b778f865a97514'],
'author': {
'name': b'Jiang Xin',
'email': b'worldhello.net@gmail.com',
'fullname': b'Jiang Xin <worldhello.net@gmail.com>',
"id": _x("7442cd78bd3b4966921d6a7f7447417b7acb15eb"),
"directory": _x("b134f9b7dc434f593c0bab696345548b37de0558"),
"parents": [
_x("689664ae944b4692724f13b709a4e4de28b54e57"),
_x("c888305e1efbaa252d01b4e5e6b778f865a97514"),
],
"author": {
"name": b"Jiang Xin",
"email": b"worldhello.net@gmail.com",
"fullname": b"Jiang Xin <worldhello.net@gmail.com>",
},
'date': {
'timestamp': 1428538899,
'offset': 480,
"date": {
"timestamp": 1428538899,
"offset": 480,
},
'committer': {
'name': b'Jiang Xin',
'email': b'worldhello.net@gmail.com',
"committer": {
"name": b"Jiang Xin",
"email": b"worldhello.net@gmail.com",
},
'committer_date': {
'timestamp': 1428538899,
'offset': 480,
"committer_date": {
"timestamp": 1428538899,
"offset": 480,
},
'message': b'',
"message": b"",
"type": "git",
"synthetic": False,
}
self.revision_only_fullname = {
'id': '010d34f384fa99d047cdd5e2f41e56e5c2feee45',
'directory': '85a74718d377195e1efd0843ba4f3260bad4fe07',
'parents': ['01e2d0627a9a6edb24c37db45db5ecb31e9de808'],
'author': {
'fullname': b'Linus Torvalds <torvalds@linux-foundation.org>',
"id": _x("010d34f384fa99d047cdd5e2f41e56e5c2feee45"),
"directory": _x("85a74718d377195e1efd0843ba4f3260bad4fe07"),
"parents": [_x("01e2d0627a9a6edb24c37db45db5ecb31e9de808")],
"author": {
"fullname": b"Linus Torvalds <torvalds@linux-foundation.org>",
},
'date': datetime.datetime(2015, 7, 12, 15, 10, 30,
tzinfo=linus_tz),
'committer': {
'fullname': b'Linus Torvalds <torvalds@linux-foundation.org>',
"date": datetime.datetime(2015, 7, 12, 15, 10, 30, tzinfo=linus_tz),
"committer": {
"fullname": b"Linus Torvalds <torvalds@linux-foundation.org>",
},
'committer_date': datetime.datetime(2015, 7, 12, 15, 10, 30,
tzinfo=linus_tz),
'message': b'Linux 4.2-rc2\n',
'metadata': {
'extra_headers': [
['svn-repo-uuid', '046f1af7-66c2-d61b-5410-ce57b7db7bff'],
['svn-revision', 10],
]
}
"committer_date": datetime.datetime(
2015, 7, 12, 15, 10, 30, tzinfo=linus_tz
),
"message": b"Linux 4.2-rc2\n",
"type": "git",
"synthetic": False,
"extra_headers": (
(b"svn-repo-uuid", b"046f1af7-66c2-d61b-5410-ce57b7db7bff"),
(b"svn-revision", b"10"),
),
}
def test_revision_identifier(self):
self.assertEqual(
identifiers.revision_identifier(self.revision),
identifiers.identifier_to_str(self.revision['id']),
Revision.from_dict(self.revision).id,
self.revision["id"],
)
self.assertEqual(
Revision.from_dict(remove_id(self.revision)).id,
self.revision["id"],
)
def test_revision_identifier_none_metadata(self):
self.assertEqual(
identifiers.revision_identifier(self.revision_none_metadata),
identifiers.identifier_to_str(self.revision_none_metadata['id']),
Revision.from_dict(remove_id(self.revision_none_metadata)).id,
self.revision_none_metadata["id"],
)
def test_revision_identifier_synthetic(self):
self.assertEqual(
identifiers.revision_identifier(self.synthetic_revision),
identifiers.identifier_to_str(self.synthetic_revision['id']),
Revision.from_dict(remove_id(self.synthetic_revision)).id,
self.synthetic_revision["id"],
)
def test_revision_identifier_with_extra_headers(self):
self.assertEqual(
identifiers.revision_identifier(
self.revision_with_extra_headers),
identifiers.identifier_to_str(
self.revision_with_extra_headers['id']),
Revision.from_dict(remove_id(self.revision_with_extra_headers)).id,
self.revision_with_extra_headers["id"],
)
def test_revision_identifier_with_gpgsig(self):
self.assertEqual(
identifiers.revision_identifier(
self.revision_with_gpgsig),
identifiers.identifier_to_str(
self.revision_with_gpgsig['id']),
Revision.from_dict(remove_id(self.revision_with_gpgsig)).id,
self.revision_with_gpgsig["id"],
)
def test_revision_identifier_no_message(self):
self.assertEqual(
identifiers.revision_identifier(
self.revision_no_message),
identifiers.identifier_to_str(
self.revision_no_message['id']),
Revision.from_dict(remove_id(self.revision_no_message)).id,
self.revision_no_message["id"],
)
def test_revision_identifier_empty_message(self):
self.assertEqual(
identifiers.revision_identifier(
self.revision_empty_message),
identifiers.identifier_to_str(
self.revision_empty_message['id']),
Revision.from_dict(remove_id(self.revision_empty_message)).id,
self.revision_empty_message["id"],
)
def test_revision_identifier_only_fullname(self):
self.assertEqual(
identifiers.revision_identifier(
self.revision_only_fullname),
identifiers.identifier_to_str(
self.revision_only_fullname['id']),
Revision.from_dict(remove_id(self.revision_only_fullname)).id,
self.revision_only_fullname["id"],
)
class ReleaseIdentifier(unittest.TestCase):
def setUp(self):
linus_tz = datetime.timezone(datetime.timedelta(minutes=-420))
self.release = {
'id': '2b10839e32c4c476e9d94492756bb1a3e1ec4aa8',
'target': b't\x1b"R\xa5\xe1Ml`\xa9\x13\xc7z`\x99\xab\xe7:\x85J',
'target_type': 'revision',
'name': b'v2.6.14',
'author': {
'name': b'Linus Torvalds',
'email': b'torvalds@g5.osdl.org',
},
'date': datetime.datetime(2005, 10, 27, 17, 2, 33,
tzinfo=linus_tz),
'message': b'''\
release_example = {
"id": _x("2b10839e32c4c476e9d94492756bb1a3e1ec4aa8"),
"target": _x("741b2252a5e14d6c60a913c77a6099abe73a854a"),
"target_type": "revision",
"name": b"v2.6.14",
"author": {
"name": b"Linus Torvalds",
"email": b"torvalds@g5.osdl.org",
"fullname": b"Linus Torvalds <torvalds@g5.osdl.org>",
},
"date": datetime.datetime(2005, 10, 27, 17, 2, 33, tzinfo=linus_tz),
"message": b"""\
Linux 2.6.14 release
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.1 (GNU/Linux)
......@@ -533,16 +534,23 @@ iD8DBQBDYWq6F3YsRnbiHLsRAmaeAJ9RCez0y8rOBbhSv344h86l/VVcugCeIhO1
wdLOnvj91G4wxYqrvThthbE=
=7VeT
-----END PGP SIGNATURE-----
''',
'synthetic': False,
}
""",
"synthetic": False,
}
class ReleaseIdentifier(unittest.TestCase):
def setUp(self):
linus_tz = datetime.timezone(datetime.timedelta(minutes=-420))
self.release = release_example
self.release_no_author = {
'id': b'&y\x1a\x8b\xcf\x0em3\xf4:\xefv\x82\xbd\xb5U#mV\xde',
'target': '9ee1c939d1cb936b1f98e8d81aeffab57bae46ab',
'target_type': 'revision',
'name': b'v2.6.12',
'message': b'''\
"id": _x("26791a8bcf0e6d33f43aef7682bdb555236d56de"),
"target": _x("9ee1c939d1cb936b1f98e8d81aeffab57bae46ab"),
"target_type": "revision",
"name": b"v2.6.12",
"message": b"""\
This is the final 2.6.12 release
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.4 (GNU/Linux)
......@@ -551,356 +559,805 @@ iD8DBQBCsykyF3YsRnbiHLsRAvPNAJ482tCZwuxp/bJRz7Q98MHlN83TpACdHr37
o6X/3T+vm8K3bf3driRr34c=
=sBHn
-----END PGP SIGNATURE-----
''',
'synthetic': False,
""",
"synthetic": False,
}
self.release_no_message = {
'id': 'b6f4f446715f7d9543ef54e41b62982f0db40045',
'target': '9ee1c939d1cb936b1f98e8d81aeffab57bae46ab',
'target_type': 'revision',
'name': b'v2.6.12',
'author': {
'name': b'Linus Torvalds',
'email': b'torvalds@g5.osdl.org',
"id": _x("b6f4f446715f7d9543ef54e41b62982f0db40045"),
"target": _x("9ee1c939d1cb936b1f98e8d81aeffab57bae46ab"),
"target_type": "revision",
"name": b"v2.6.12",
"author": {
"name": b"Linus Torvalds",
"email": b"torvalds@g5.osdl.org",
},
'date': datetime.datetime(2005, 10, 27, 17, 2, 33,
tzinfo=linus_tz),
'message': None,
"date": datetime.datetime(2005, 10, 27, 17, 2, 33, tzinfo=linus_tz),
"message": None,
"synthetic": False,
}
self.release_empty_message = {
'id': '71a0aea72444d396575dc25ac37fec87ee3c6492',
'target': '9ee1c939d1cb936b1f98e8d81aeffab57bae46ab',
'target_type': 'revision',
'name': b'v2.6.12',
'author': {
'name': b'Linus Torvalds',
'email': b'torvalds@g5.osdl.org',
"id": _x("71a0aea72444d396575dc25ac37fec87ee3c6492"),
"target": _x("9ee1c939d1cb936b1f98e8d81aeffab57bae46ab"),
"target_type": "revision",
"name": b"v2.6.12",
"author": {
"name": b"Linus Torvalds",
"email": b"torvalds@g5.osdl.org",
},
'date': datetime.datetime(2005, 10, 27, 17, 2, 33,
tzinfo=linus_tz),
'message': b'',
"date": datetime.datetime(2005, 10, 27, 17, 2, 33, tzinfo=linus_tz),
"message": b"",
"synthetic": False,
}
self.release_negative_utc = {
'id': '97c8d2573a001f88e72d75f596cf86b12b82fd01',
'name': b'20081029',
'target': '54e9abca4c77421e2921f5f156c9fe4a9f7441c7',
'target_type': 'revision',
'date': {
'timestamp': {'seconds': 1225281976},
'offset': 0,
'negative_utc': True,
"id": _x("97c8d2573a001f88e72d75f596cf86b12b82fd01"),
"name": b"20081029",
"target": _x("54e9abca4c77421e2921f5f156c9fe4a9f7441c7"),
"target_type": "revision",
"date": {
"timestamp": {"seconds": 1225281976},
"offset_bytes": b"-0000",
},
'author': {
'name': b'Otavio Salvador',
'email': b'otavio@debian.org',
'id': 17640,
"author": {
"name": b"Otavio Salvador",
"email": b"otavio@debian.org",
},
'synthetic': False,
'message': b'tagging version 20081029\n\nr56558\n',
"synthetic": False,
"message": b"tagging version 20081029\n\nr56558\n",
}
self.release_newline_in_author = {
'author': {
'email': b'esycat@gmail.com',
'fullname': b'Eugene Janusov\n<esycat@gmail.com>',
'name': b'Eugene Janusov\n',
"author": {
"email": b"esycat@gmail.com",
"fullname": b"Eugene Janusov\n<esycat@gmail.com>",
"name": b"Eugene Janusov\n",
},
'date': {
'negative_utc': None,
'offset': 600,
'timestamp': {
'microseconds': 0,
'seconds': 1377480558,
"date": {
"offset_bytes": b"+1000",
"timestamp": {
"microseconds": 0,
"seconds": 1377480558,
},
},
'id': b'\\\x98\xf5Y\xd04\x16-\xe2->\xbe\xb9T3\xe6\xf8\x88R1',
'message': b'Release of v0.3.2.',
'name': b'0.3.2',
'synthetic': False,
'target': (b'\xc0j\xa3\xd9;x\xa2\x86\\I5\x17'
b'\x000\xf8\xc2\xd79o\xd3'),
'target_type': 'revision',
"id": _x("5c98f559d034162de22d3ebeb95433e6f8885231"),
"message": b"Release of v0.3.2.",
"name": b"0.3.2",
"synthetic": False,
"target": _x("c06aa3d93b78a2865c4935170030f8c2d7396fd3"),
"target_type": "revision",
}
self.release_snapshot_target = dict(self.release)
self.release_snapshot_target["target_type"] = "snapshot"
self.release_snapshot_target["id"] = _x(
"c29c3ddcc6769a04e54dd69d63a6fdcbc566f850"
)
def test_release_identifier(self):
self.assertEqual(
identifiers.release_identifier(self.release),
identifiers.identifier_to_str(self.release['id'])
Release.from_dict(self.release).id,
self.release["id"],
)
self.assertEqual(
Release.from_dict(remove_id(self.release)).id,
self.release["id"],
)
def test_release_identifier_no_author(self):
self.assertEqual(
identifiers.release_identifier(self.release_no_author),
identifiers.identifier_to_str(self.release_no_author['id'])
Release.from_dict(remove_id(self.release_no_author)).id,
self.release_no_author["id"],
)
def test_release_identifier_no_message(self):
self.assertEqual(
identifiers.release_identifier(self.release_no_message),
identifiers.identifier_to_str(self.release_no_message['id'])
Release.from_dict(remove_id(self.release_no_message)).id,
self.release_no_message["id"],
)
def test_release_identifier_empty_message(self):
self.assertEqual(
identifiers.release_identifier(self.release_empty_message),
identifiers.identifier_to_str(self.release_empty_message['id'])
Release.from_dict(remove_id(self.release_empty_message)).id,
self.release_empty_message["id"],
)
def test_release_identifier_negative_utc(self):
self.assertEqual(
identifiers.release_identifier(self.release_negative_utc),
identifiers.identifier_to_str(self.release_negative_utc['id'])
Release.from_dict(remove_id(self.release_negative_utc)).id,
self.release_negative_utc["id"],
)
def test_release_identifier_newline_in_author(self):
self.assertEqual(
identifiers.release_identifier(self.release_newline_in_author),
identifiers.identifier_to_str(self.release_newline_in_author['id'])
Release.from_dict(remove_id(self.release_newline_in_author)).id,
self.release_newline_in_author["id"],
)
def test_release_identifier_snapshot_target(self):
self.assertEqual(
Release.from_dict(self.release_snapshot_target).id,
self.release_snapshot_target["id"],
)
snapshot_example = {
"id": _x("6e65b86363953b780d92b0a928f3e8fcdd10db36"),
"branches": {
b"directory": {
"target": _x("1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8"),
"target_type": "directory",
},
b"content": {
"target": _x("fe95a46679d128ff167b7c55df5d02356c5a1ae1"),
"target_type": "content",
},
b"alias": {
"target": b"revision",
"target_type": "alias",
},
b"revision": {
"target": _x("aafb16d69fd30ff58afdd69036a26047f3aebdc6"),
"target_type": "revision",
},
b"release": {
"target": _x("7045404f3d1c54e6473c71bbb716529fbad4be24"),
"target_type": "release",
},
b"snapshot": {
"target": _x("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"),
"target_type": "snapshot",
},
b"dangling": None,
},
}
class SnapshotIdentifier(unittest.TestCase):
def setUp(self):
super().setUp()
self.empty = {
'id': '1a8893e6a86f444e8be8e7bda6cb34fb1735a00e',
'branches': {},
}
self.dangling_branch = {
'id': 'c84502e821eb21ed84e9fd3ec40973abc8b32353',
'branches': {
b'HEAD': None,
},
}
self.empty = Snapshot.from_dict(
{
"id": _x("1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"),
"branches": {},
}
)
self.unresolved = {
'id': '84b4548ea486e4b0a7933fa541ff1503a0afe1e0',
'branches': {
b'foo': {
'target': b'bar',
'target_type': 'alias',
self.dangling_branch = Snapshot.from_dict(
{
"id": _x("c84502e821eb21ed84e9fd3ec40973abc8b32353"),
"branches": {
b"HEAD": None,
},
},
}
}
)
self.all_types = {
'id': '6e65b86363953b780d92b0a928f3e8fcdd10db36',
'branches': {
b'directory': {
'target': '1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8',
'target_type': 'directory',
},
b'content': {
'target': 'fe95a46679d128ff167b7c55df5d02356c5a1ae1',
'target_type': 'content',
},
b'alias': {
'target': b'revision',
'target_type': 'alias',
},
b'revision': {
'target': 'aafb16d69fd30ff58afdd69036a26047f3aebdc6',
'target_type': 'revision',
self.unresolved = Snapshot.from_dict(
{
"id": _x("84b4548ea486e4b0a7933fa541ff1503a0afe1e0"),
"branches": {
b"foo": {
"target": b"bar",
"target_type": "alias",
},
},
b'release': {
'target': '7045404f3d1c54e6473c71bbb716529fbad4be24',
'target_type': 'release',
},
b'snapshot': {
'target': '1a8893e6a86f444e8be8e7bda6cb34fb1735a00e',
'target_type': 'snapshot',
},
b'dangling': None,
}
}
)
self.all_types = snapshot_example
def test_empty_snapshot(self):
self.assertEqual(
identifiers.snapshot_identifier(self.empty),
identifiers.identifier_to_str(self.empty['id']),
Snapshot.from_dict(remove_id(self.empty.to_dict())).id,
self.empty.id,
)
def test_dangling_branch(self):
self.assertEqual(
identifiers.snapshot_identifier(self.dangling_branch),
identifiers.identifier_to_str(self.dangling_branch['id']),
Snapshot.from_dict(remove_id(self.dangling_branch.to_dict())).id,
self.dangling_branch.id,
)
def test_unresolved(self):
self.assertEqual(
Snapshot.from_dict(remove_id(self.unresolved.to_dict())).id,
self.unresolved.id,
)
def test_git_object_unresolved(self):
with self.assertRaisesRegex(ValueError, "b'foo' -> b'bar'"):
identifiers.snapshot_identifier(self.unresolved)
git_objects.snapshot_git_object(self.unresolved)
git_objects.snapshot_git_object(self.unresolved, ignore_unresolved=True)
def test_unresolved_force(self):
def test_all_types(self):
self.assertEqual(
identifiers.snapshot_identifier(
self.unresolved,
ignore_unresolved=True,
Snapshot.from_dict(remove_id(self.all_types)).id,
self.all_types["id"],
)
authority_example = {
"type": "forge",
"url": "https://forge.softwareheritage.org/",
}
fetcher_example = {
"name": "swh-phabricator-metadata-fetcher",
"version": "0.0.1",
}
metadata_example = {
"target": "swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d",
"discovery_date": datetime.datetime(
2021, 1, 25, 11, 27, 51, tzinfo=datetime.timezone.utc
),
"authority": authority_example,
"fetcher": fetcher_example,
"format": "json",
"metadata": b'{"foo": "bar"}',
}
class RawExtrinsicMetadataIdentifier(unittest.TestCase):
def setUp(self):
super().setUp()
self.minimal = metadata_example
self.maximal = {
**self.minimal,
"origin": "https://forge.softwareheritage.org/source/swh-model/",
"visit": 42,
"snapshot": "swh:1:snp:" + "00" * 20,
"release": "swh:1:rel:" + "01" * 20,
"revision": "swh:1:rev:" + "02" * 20,
"path": b"/abc/def",
"directory": "swh:1:dir:" + "03" * 20,
}
def test_minimal(self):
git_object = (
b"raw_extrinsic_metadata 210\0"
b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
b"discovery_date 1611574071\n"
b"authority forge https://forge.softwareheritage.org/\n"
b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
b"format json\n"
b"\n"
b'{"foo": "bar"}'
)
self.assertEqual(
git_objects.raw_extrinsic_metadata_git_object(
RawExtrinsicMetadata.from_dict(self.minimal)
),
identifiers.identifier_to_str(self.unresolved['id']),
git_object,
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(self.minimal).id,
hashlib.sha1(git_object).digest(),
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(self.minimal).id,
_x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
)
def test_maximal(self):
git_object = (
b"raw_extrinsic_metadata 533\0"
b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
b"discovery_date 1611574071\n"
b"authority forge https://forge.softwareheritage.org/\n"
b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
b"format json\n"
b"origin https://forge.softwareheritage.org/source/swh-model/\n"
b"visit 42\n"
b"snapshot swh:1:snp:0000000000000000000000000000000000000000\n"
b"release swh:1:rel:0101010101010101010101010101010101010101\n"
b"revision swh:1:rev:0202020202020202020202020202020202020202\n"
b"path /abc/def\n"
b"directory swh:1:dir:0303030303030303030303030303030303030303\n"
b"\n"
b'{"foo": "bar"}'
)
def test_all_types(self):
self.assertEqual(
identifiers.snapshot_identifier(self.all_types),
identifiers.identifier_to_str(self.all_types['id']),
)
def test_persistent_identifier(self):
_snapshot_id = hashutil.hash_to_bytes(
'c7c108084bc0bf3d81436bf980b46e98bd338453')
_release_id = '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'
_revision_id = '309cf2674ee7a0749978cf8265ab91a60aea0f7d'
_directory_id = 'd198bc9d7a6bcf6db04f476d29314f157507d505'
_content_id = '94a9ed024d3859793618152ea559a168bbcbb5e2'
_snapshot = {'id': _snapshot_id}
_release = {'id': _release_id}
_revision = {'id': _revision_id}
_directory = {'id': _directory_id}
_content = {'sha1_git': _content_id}
for full_type, _hash, expected_persistent_id, version, _meta in [
(SNAPSHOT, _snapshot_id,
'swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453',
None, {}),
(RELEASE, _release_id,
'swh:2:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f',
2, {}),
(REVISION, _revision_id,
'swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d',
None, {}),
(DIRECTORY, _directory_id,
'swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505',
None, {}),
(CONTENT, _content_id,
'swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2',
1, {}),
(SNAPSHOT, _snapshot,
'swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453',
None, {}),
(RELEASE, _release,
'swh:2:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f',
2, {}),
(REVISION, _revision,
'swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d',
None, {}),
(DIRECTORY, _directory,
'swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505',
None, {}),
(CONTENT, _content,
'swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2',
1, {}),
(CONTENT, _content,
'swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2;origin=1',
1, {'origin': '1'}),
]:
if version:
actual_value = identifiers.persistent_identifier(
full_type, _hash, version, metadata=_meta)
else:
actual_value = identifiers.persistent_identifier(
full_type, _hash, metadata=_meta)
self.assertEqual(actual_value, expected_persistent_id)
def test_persistent_identifier_wrong_input(self):
_snapshot_id = 'notahash4bc0bf3d81436bf980b46e98bd338453'
_snapshot = {'id': _snapshot_id}
for _type, _hash, _error in [
(SNAPSHOT, _snapshot_id, 'Unexpected characters'),
(SNAPSHOT, _snapshot, 'Unexpected characters'),
('foo', '', 'Wrong input: Supported types are'),
]:
with self.assertRaisesRegex(ValidationError, _error):
identifiers.persistent_identifier(_type, _hash)
def test_parse_persistent_identifier(self):
for pid, _type, _version, _hash in [
('swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2',
CONTENT, 1, '94a9ed024d3859793618152ea559a168bbcbb5e2'),
('swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505',
DIRECTORY, 1, 'd198bc9d7a6bcf6db04f476d29314f157507d505'),
('swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d',
REVISION, 1, '309cf2674ee7a0749978cf8265ab91a60aea0f7d'),
('swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f',
RELEASE, 1, '22ece559cc7cc2364edc5e5593d63ae8bd229f9f'),
('swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453',
SNAPSHOT, 1, 'c7c108084bc0bf3d81436bf980b46e98bd338453'),
]:
expected_result = PersistentId(
namespace='swh',
scheme_version=_version,
object_type=_type,
object_id=_hash,
metadata={}
)
actual_result = identifiers.parse_persistent_identifier(pid)
self.assertEqual(actual_result, expected_result)
for pid, _type, _version, _hash, _metadata in [
('swh:1:cnt:9c95815d9e9d91b8dae8e05d8bbc696fe19f796b;lines=1-18;origin=https://github.com/python/cpython', # noqa
CONTENT, 1, '9c95815d9e9d91b8dae8e05d8bbc696fe19f796b',
{
'lines': '1-18',
'origin': 'https://github.com/python/cpython'
}),
('swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=deb://Debian/packages/linuxdoc-tools', # noqa
DIRECTORY, 1, '0b6959356d30f1a4e9b7f6bca59b9a336464c03d',
{
'origin': 'deb://Debian/packages/linuxdoc-tools'
})
]:
expected_result = PersistentId(
namespace='swh',
scheme_version=_version,
object_type=_type,
object_id=_hash,
metadata=_metadata
)
actual_result = identifiers.parse_persistent_identifier(pid)
self.assertEqual(actual_result, expected_result)
def test_parse_persistent_identifier_parsing_error(self):
for pid, _error in [
('swh:1:cnt',
'Wrong format: There should be 4 mandatory values'),
('swh:1:',
'Wrong format: There should be 4 mandatory values'),
('swh:',
'Wrong format: There should be 4 mandatory values'),
('swh:1:cnt:',
'Wrong format: Identifier should be present'),
('foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505',
'Wrong format: Supported namespace is \'swh\''),
('swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505',
'Wrong format: Supported version is 1'),
('swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505',
'Wrong format: Supported types are %s' % (
', '.join(PERSISTENT_IDENTIFIER_TYPES))),
('swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;'
'malformed',
'Contextual data is badly formatted, form key=val expected'),
('swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d',
'Wrong format: Identifier should be a valid hash'),
('swh:1:snp:foo',
'Wrong format: Identifier should be a valid hash')
]:
with self.assertRaisesRegex(
ValidationError, _error):
identifiers.parse_persistent_identifier(pid)
git_objects.raw_extrinsic_metadata_git_object(
RawExtrinsicMetadata.from_dict(self.maximal)
),
git_object,
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(self.maximal).id,
hashlib.sha1(git_object).digest(),
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(self.maximal).id,
_x("f96966e1093d15236a31fde07e47d5b1c9428049"),
)
def test_nonascii_path(self):
metadata = {
**self.minimal,
"path": b"/ab\nc/d\xf0\x9f\xa4\xb7e\x00f",
}
git_object = (
b"raw_extrinsic_metadata 231\0"
b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
b"discovery_date 1611574071\n"
b"authority forge https://forge.softwareheritage.org/\n"
b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
b"format json\n"
b"path /ab\n"
b" c/d\xf0\x9f\xa4\xb7e\x00f\n"
b"\n"
b'{"foo": "bar"}'
)
class OriginIdentifier(unittest.TestCase):
def setUp(self):
self.origin = {
'url': 'https://github.com/torvalds/linux',
self.assertEqual(
git_objects.raw_extrinsic_metadata_git_object(
RawExtrinsicMetadata.from_dict(metadata)
),
git_object,
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(metadata).id,
hashlib.sha1(git_object).digest(),
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(metadata).id,
_x("7cc83fd1912176510c083f5df43f01b09af4b333"),
)
def test_timezone_insensitive(self):
"""Checks the timezone of the datetime.datetime does not affect the
hashed git_object."""
utc_plus_one = datetime.timezone(datetime.timedelta(hours=1))
metadata = {
**self.minimal,
"discovery_date": datetime.datetime(
2021,
1,
25,
12,
27,
51,
tzinfo=utc_plus_one,
),
}
self.assertEqual(
git_objects.raw_extrinsic_metadata_git_object(
RawExtrinsicMetadata.from_dict(self.minimal)
),
git_objects.raw_extrinsic_metadata_git_object(
RawExtrinsicMetadata.from_dict(metadata)
),
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(self.minimal).id,
RawExtrinsicMetadata.from_dict(metadata).id,
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(metadata).id,
_x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
)
def test_microsecond_insensitive(self):
"""Checks the microseconds of the datetime.datetime does not affect the
hashed manifest."""
metadata = {
**self.minimal,
"discovery_date": datetime.datetime(
2021,
1,
25,
11,
27,
51,
123456,
tzinfo=datetime.timezone.utc,
),
}
self.assertEqual(
git_objects.raw_extrinsic_metadata_git_object(
RawExtrinsicMetadata.from_dict(self.minimal)
),
git_objects.raw_extrinsic_metadata_git_object(
RawExtrinsicMetadata.from_dict(metadata)
),
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(self.minimal).id,
RawExtrinsicMetadata.from_dict(metadata).id,
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(metadata).id,
_x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
)
def test_noninteger_timezone(self):
"""Checks the discovery_date is translated to UTC before truncating
microseconds"""
tz = datetime.timezone(datetime.timedelta(microseconds=-42))
metadata = {
**self.minimal,
"discovery_date": datetime.datetime(
2021,
1,
25,
11,
27,
50,
1_000_000 - 42,
tzinfo=tz,
),
}
self.assertEqual(
git_objects.raw_extrinsic_metadata_git_object(
RawExtrinsicMetadata.from_dict(self.minimal)
),
git_objects.raw_extrinsic_metadata_git_object(
RawExtrinsicMetadata.from_dict(metadata)
),
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(self.minimal).id,
RawExtrinsicMetadata.from_dict(metadata).id,
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(metadata).id,
_x("5c13f20ba336e44549baf3d7b9305b027ec9f43d"),
)
def test_negative_timestamp(self):
metadata = {
**self.minimal,
"discovery_date": datetime.datetime(
1960,
1,
25,
11,
27,
51,
tzinfo=datetime.timezone.utc,
),
}
git_object = (
b"raw_extrinsic_metadata 210\0"
b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
b"discovery_date -313504329\n"
b"authority forge https://forge.softwareheritage.org/\n"
b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
b"format json\n"
b"\n"
b'{"foo": "bar"}'
)
self.assertEqual(
git_objects.raw_extrinsic_metadata_git_object(
RawExtrinsicMetadata.from_dict(metadata)
),
git_object,
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(metadata).id,
hashlib.sha1(git_object).digest(),
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(metadata).id,
_x("895d0821a2991dd376ddc303424aceb7c68280f9"),
)
def test_epoch(self):
metadata = {
**self.minimal,
"discovery_date": datetime.datetime(
1970,
1,
1,
0,
0,
0,
tzinfo=datetime.timezone.utc,
),
}
git_object = (
b"raw_extrinsic_metadata 201\0"
b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
b"discovery_date 0\n"
b"authority forge https://forge.softwareheritage.org/\n"
b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
b"format json\n"
b"\n"
b'{"foo": "bar"}'
)
self.assertEqual(
git_objects.raw_extrinsic_metadata_git_object(
RawExtrinsicMetadata.from_dict(metadata)
),
git_object,
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(metadata).id,
hashlib.sha1(git_object).digest(),
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(metadata).id,
_x("27a53df54ace35ebd910493cdc70b334d6b7cb88"),
)
def test_negative_epoch(self):
metadata = {
**self.minimal,
"discovery_date": datetime.datetime(
1969,
12,
31,
23,
59,
59,
1,
tzinfo=datetime.timezone.utc,
),
}
git_object = (
b"raw_extrinsic_metadata 202\0"
b"target swh:1:cnt:568aaf43d83b2c3df8067f3bedbb97d83260be6d\n"
b"discovery_date -1\n"
b"authority forge https://forge.softwareheritage.org/\n"
b"fetcher swh-phabricator-metadata-fetcher 0.0.1\n"
b"format json\n"
b"\n"
b'{"foo": "bar"}'
)
self.assertEqual(
git_objects.raw_extrinsic_metadata_git_object(
RawExtrinsicMetadata.from_dict(metadata)
),
git_object,
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(metadata).id,
hashlib.sha1(git_object).digest(),
)
self.assertEqual(
RawExtrinsicMetadata.from_dict(metadata).id,
_x("be7154a8fd49d87f81547ea634d1e2152907d089"),
)
origin_example = {
"url": "https://github.com/torvalds/linux",
}
class OriginIdentifier(unittest.TestCase):
def test_content_identifier(self):
self.assertEqual(identifiers.origin_identifier(self.origin),
'b63a575fe3faab7692c9f38fb09d4bb45651bb0f')
self.assertEqual(
Origin.from_dict(origin_example).id,
_x("b63a575fe3faab7692c9f38fb09d4bb45651bb0f"),
)
# Format: [
# (
# input1,
# expected_output1,
# ),
# (
# input2,
# expected_output2,
# ),
# ...
# ]
TS_DICTS = [
# with current input dict format (offset_bytes)
(
{"timestamp": 12345, "offset_bytes": b"+0000"},
{
"timestamp": {"seconds": 12345, "microseconds": 0},
"offset_bytes": b"+0000",
},
),
(
{"timestamp": 12345, "offset_bytes": b"-0000"},
{
"timestamp": {"seconds": 12345, "microseconds": 0},
"offset_bytes": b"-0000",
},
),
(
{"timestamp": 12345, "offset_bytes": b"+0200"},
{
"timestamp": {"seconds": 12345, "microseconds": 0},
"offset_bytes": b"+0200",
},
),
(
{"timestamp": 12345, "offset_bytes": b"-0200"},
{
"timestamp": {"seconds": 12345, "microseconds": 0},
"offset_bytes": b"-0200",
},
),
(
{"timestamp": 12345, "offset_bytes": b"--700"},
{
"timestamp": {"seconds": 12345, "microseconds": 0},
"offset_bytes": b"--700",
},
),
(
{"timestamp": 12345, "offset_bytes": b"1234567"},
{
"timestamp": {"seconds": 12345, "microseconds": 0},
"offset_bytes": b"1234567",
},
),
# with old-style input dicts (numeric offset + optional negative_utc):
(
{"timestamp": 12345, "offset": 0},
{
"timestamp": {"seconds": 12345, "microseconds": 0},
"offset_bytes": b"+0000",
},
),
(
{"timestamp": 12345, "offset": 0, "negative_utc": False},
{
"timestamp": {"seconds": 12345, "microseconds": 0},
"offset_bytes": b"+0000",
},
),
(
{"timestamp": 12345, "offset": 0, "negative_utc": False},
{
"timestamp": {"seconds": 12345, "microseconds": 0},
"offset_bytes": b"+0000",
},
),
(
{"timestamp": 12345, "offset": 0, "negative_utc": None},
{
"timestamp": {"seconds": 12345, "microseconds": 0},
"offset_bytes": b"+0000",
},
),
(
{"timestamp": {"seconds": 12345}, "offset": 0, "negative_utc": None},
{
"timestamp": {"seconds": 12345, "microseconds": 0},
"offset_bytes": b"+0000",
},
),
(
{
"timestamp": {"seconds": 12345, "microseconds": 0},
"offset": 0,
"negative_utc": None,
},
{
"timestamp": {"seconds": 12345, "microseconds": 0},
"offset_bytes": b"+0000",
},
),
(
{
"timestamp": {"seconds": 12345, "microseconds": 100},
"offset": 0,
"negative_utc": None,
},
{
"timestamp": {"seconds": 12345, "microseconds": 100},
"offset_bytes": b"+0000",
},
),
(
{"timestamp": 12345, "offset": 0, "negative_utc": True},
{
"timestamp": {"seconds": 12345, "microseconds": 0},
"offset_bytes": b"-0000",
},
),
(
{"timestamp": 12345, "offset": 0, "negative_utc": None},
{
"timestamp": {"seconds": 12345, "microseconds": 0},
"offset_bytes": b"+0000",
},
),
]
@pytest.mark.parametrize("dict_input,expected", TS_DICTS)
def test_normalize_timestamp_dict(dict_input, expected):
assert TimestampWithTimezone.from_dict(dict_input).to_dict() == expected
TS_DICTS_INVALID_TIMESTAMP = [
{"timestamp": 1.2, "offset": 0},
{"timestamp": "1", "offset": 0},
# these below should really also trigger a ValueError...
# {"timestamp": {"seconds": "1"}, "offset": 0},
# {"timestamp": {"seconds": 1.2}, "offset": 0},
# {"timestamp": {"seconds": 1.2}, "offset": 0},
]
@pytest.mark.parametrize("dict_input", TS_DICTS_INVALID_TIMESTAMP)
def test_normalize_timestamp_dict_invalid_timestamp(dict_input):
with pytest.raises(ValueError, match="non-integer timestamp"):
TimestampWithTimezone.from_dict(dict_input)
UTC = datetime.timezone.utc
TS_TIMEZONES = [
datetime.timezone.min,
datetime.timezone(datetime.timedelta(hours=-1)),
UTC,
datetime.timezone(datetime.timedelta(minutes=+60)),
datetime.timezone.max,
]
TS_TZ_EXPECTED = [-1439, -60, 0, 60, 1439]
TS_TZ_BYTES_EXPECTED = [b"-2359", b"-0100", b"+0000", b"+0100", b"+2359"]
TS_DATETIMES = [
datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=UTC),
datetime.datetime(2120, 12, 31, 23, 59, 59, tzinfo=UTC),
datetime.datetime(1610, 5, 14, 15, 43, 0, tzinfo=UTC),
]
TS_DT_EXPECTED = [1582814359, 4765132799, -11348929020]
@pytest.mark.parametrize("date, seconds", zip(TS_DATETIMES, TS_DT_EXPECTED))
@pytest.mark.parametrize(
"tz, offset, offset_bytes", zip(TS_TIMEZONES, TS_TZ_EXPECTED, TS_TZ_BYTES_EXPECTED)
)
@pytest.mark.parametrize("microsecond", [0, 1, 10, 100, 1000, 999999])
def test_normalize_timestamp_datetime(
date, seconds, tz, offset, offset_bytes, microsecond
):
date = date.astimezone(tz).replace(microsecond=microsecond)
assert TimestampWithTimezone.from_dict(date).to_dict() == {
"timestamp": {"seconds": seconds, "microseconds": microsecond},
"offset_bytes": offset_bytes,
}
def test_extid_identifier_bwcompat():
extid_dict = {
"extid_type": "test-type",
"extid": b"extid",
"target": "swh:1:dir:" + "00" * 20,
}
assert ExtID.from_dict(extid_dict).id == _x(
"b9295e1931c31e40a7e3e1e967decd1c89426455"
)
assert (
ExtID.from_dict({**extid_dict, "extid_version": 0}).id
== ExtID.from_dict(extid_dict).id
)
assert (
ExtID.from_dict({**extid_dict, "extid_version": 1}).id
!= ExtID.from_dict(extid_dict).id
)
assert (
ExtID.from_dict(
{
**extid_dict,
"payload_type": "test",
"payload": bytes.fromhex("257cc5642cb1a054f08cc83f2d943e56fd3ebe99"),
}
).id
!= ExtID.from_dict(extid_dict).id
)
# Copyright (C) 2017 The Software Heritage developers
# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -9,28 +9,20 @@ from swh.model import merkle
class MerkleTestNode(merkle.MerkleNode):
type = 'tested_merkle_node_type'
object_type = "tested_merkle_node_type"
def __init__(self, data):
super().__init__(data)
self.compute_hash_called = 0
def compute_hash(self):
def compute_hash(self) -> bytes:
self.compute_hash_called += 1
child_data = [
child + b'=' + self[child].hash
for child in sorted(self)
]
return (
b'hash('
+ b', '.join([self.data['value']] + child_data)
+ b')'
)
child_data = [child + b"=" + self[child].hash for child in sorted(self)]
return b"hash(" + b", ".join([self.data.get("value", b"")] + child_data) + b")"
class MerkleTestLeaf(merkle.MerkleLeaf):
type = 'tested_merkle_leaf_type'
object_type = "tested_merkle_leaf_type"
def __init__(self, data):
super().__init__(data)
......@@ -38,14 +30,22 @@ class MerkleTestLeaf(merkle.MerkleLeaf):
def compute_hash(self):
self.compute_hash_called += 1
return b'hash(' + self.data['value'] + b')'
return b"hash(" + self.data.get("value", b"") + b")"
class TestMerkleLeaf(unittest.TestCase):
def setUp(self):
self.data = {'value': b'value'}
self.data = {"value": b"value"}
self.instance = MerkleTestLeaf(self.data)
def test_equality(self):
leaf1 = MerkleTestLeaf(self.data)
leaf2 = MerkleTestLeaf(self.data)
leaf3 = MerkleTestLeaf({})
self.assertEqual(leaf1, leaf2)
self.assertNotEqual(leaf1, leaf3)
def test_hash(self):
self.assertEqual(self.instance.compute_hash_called, 0)
instance_hash = self.instance.hash
......@@ -60,29 +60,26 @@ class TestMerkleLeaf(unittest.TestCase):
def test_collect(self):
collected = self.instance.collect()
self.assertEqual(
collected, {
self.instance.type: {
self.instance.hash: self.instance.get_data(),
},
},
collected,
{self.instance},
)
collected2 = self.instance.collect()
self.assertEqual(collected2, {})
self.assertEqual(collected2, set())
self.instance.reset_collect()
collected3 = self.instance.collect()
self.assertEqual(collected, collected3)
def test_leaf(self):
with self.assertRaisesRegex(ValueError, 'is a leaf'):
self.instance[b'key1'] = 'Test'
with self.assertRaisesRegex(ValueError, "is a leaf"):
self.instance[b"key1"] = "Test"
with self.assertRaisesRegex(ValueError, 'is a leaf'):
del self.instance[b'key1']
with self.assertRaisesRegex(ValueError, "is a leaf"):
del self.instance[b"key1"]
with self.assertRaisesRegex(ValueError, 'is a leaf'):
self.instance[b'key1']
with self.assertRaisesRegex(ValueError, "is a leaf"):
self.instance[b"key1"]
with self.assertRaisesRegex(ValueError, 'is a leaf'):
with self.assertRaisesRegex(ValueError, "is a leaf"):
self.instance.update(self.data)
......@@ -90,30 +87,50 @@ class TestMerkleNode(unittest.TestCase):
maxDiff = None
def setUp(self):
self.root = MerkleTestNode({'value': b'root'})
self.nodes = {b'root': self.root}
for i in (b'a', b'b', b'c'):
value = b'root/' + i
node = MerkleTestNode({
'value': value,
})
self.root = MerkleTestNode({"value": b"root"})
self.nodes = {b"root": self.root}
for i in (b"a", b"b", b"c"):
value = b"root/" + i
node = MerkleTestNode(
{
"value": value,
}
)
self.root[i] = node
self.nodes[value] = node
for j in (b'a', b'b', b'c'):
value2 = value + b'/' + j
node2 = MerkleTestNode({
'value': value2,
})
for j in (b"a", b"b", b"c"):
value2 = value + b"/" + j
node2 = MerkleTestNode(
{
"value": value2,
}
)
node[j] = node2
self.nodes[value2] = node2
for k in (b'a', b'b', b'c'):
value3 = value2 + b'/' + j
node3 = MerkleTestNode({
'value': value3,
})
for k in (b"a", b"b", b"c"):
value3 = value2 + b"/" + j
node3 = MerkleTestNode(
{
"value": value3,
}
)
node2[j] = node3
self.nodes[value3] = node3
def test_equality(self):
node1 = MerkleTestNode({"value": b"bar"})
node2 = MerkleTestNode({"value": b"bar"})
node3 = MerkleTestNode({})
self.assertEqual(node1, node2)
self.assertNotEqual(node1, node3, node1 == node3)
node1[b"a"] = node3
self.assertNotEqual(node1, node2)
node2[b"a"] = node3
self.assertEqual(node1, node2)
def test_hash(self):
for node in self.nodes.values():
self.assertEqual(node.compute_hash_called, 0)
......@@ -122,7 +139,7 @@ class TestMerkleNode(unittest.TestCase):
hash = self.root.hash
for node in self.nodes.values():
self.assertEqual(node.compute_hash_called, 1)
self.assertIn(node.data['value'], hash)
self.assertIn(node.data["value"], hash)
# Should use the cached value
hash2 = self.root.hash
......@@ -137,10 +154,10 @@ class TestMerkleNode(unittest.TestCase):
self.assertEqual(node.compute_hash_called, 1)
# Force update of the cached value for a deeply nested node
self.root[b'a'][b'b'].update_hash(force=True)
self.root[b"a"][b"b"].update_hash(force=True)
for key, node in self.nodes.items():
# update_hash rehashes all children
if key.startswith(b'root/a/b'):
if key.startswith(b"root/a/b"):
self.assertEqual(node.compute_hash_called, 2)
else:
self.assertEqual(node.compute_hash_called, 1)
......@@ -149,81 +166,97 @@ class TestMerkleNode(unittest.TestCase):
self.assertEqual(hash, hash4)
for key, node in self.nodes.items():
# update_hash also invalidates all parents
if key in (b'root', b'root/a') or key.startswith(b'root/a/b'):
if key in (b"root", b"root/a") or key.startswith(b"root/a/b"):
self.assertEqual(node.compute_hash_called, 2)
else:
self.assertEqual(node.compute_hash_called, 1)
def test_collect(self):
collected = self.root.collect()
self.assertEqual(len(collected[self.root.type]), len(self.nodes))
self.assertEqual(collected, set(self.nodes.values()))
for node in self.nodes.values():
self.assertTrue(node.collected)
collected2 = self.root.collect()
self.assertEqual(collected2, {})
self.assertEqual(collected2, set())
def test_iter_tree_with_deduplication(self):
nodes = list(self.root.iter_tree())
self.assertCountEqual(nodes, self.nodes.values())
def test_iter_tree_without_deduplication(self):
# duplicate existing hash in merkle tree
self.root[b"d"] = MerkleTestNode({"value": b"root/c/c/c"})
nodes_dedup = list(self.root.iter_tree())
nodes = list(self.root.iter_tree(dedup=False))
assert nodes != nodes_dedup
assert len(nodes) == len(nodes_dedup) + 1
def test_get(self):
for key in (b'a', b'b', b'c'):
self.assertEqual(self.root[key], self.nodes[b'root/' + key])
for key in (b"a", b"b", b"c"):
self.assertEqual(self.root[key], self.nodes[b"root/" + key])
with self.assertRaisesRegex(KeyError, "b'nonexistent'"):
self.root[b'nonexistent']
self.root[b"nonexistent"]
def test_del(self):
hash_root = self.root.hash
hash_a = self.nodes[b'root/a'].hash
del self.root[b'a'][b'c']
hash_a = self.nodes[b"root/a"].hash
del self.root[b"a"][b"c"]
hash_root2 = self.root.hash
hash_a2 = self.nodes[b'root/a'].hash
hash_a2 = self.nodes[b"root/a"].hash
self.assertNotEqual(hash_root, hash_root2)
self.assertNotEqual(hash_a, hash_a2)
self.assertEqual(self.nodes[b'root/a/c'].parents, [])
self.assertEqual(self.nodes[b"root/a/c"].parents, [])
with self.assertRaisesRegex(KeyError, "b'nonexistent'"):
del self.root[b'nonexistent']
del self.root[b"nonexistent"]
def test_update(self):
hash_root = self.root.hash
hash_b = self.root[b'b'].hash
hash_b = self.root[b"b"].hash
new_children = {
b'c': MerkleTestNode({'value': b'root/b/new_c'}),
b'd': MerkleTestNode({'value': b'root/b/d'}),
b"c": MerkleTestNode({"value": b"root/b/new_c"}),
b"d": MerkleTestNode({"value": b"root/b/d"}),
}
# collect all nodes
self.root.collect()
self.root[b'b'].update(new_children)
self.root[b"b"].update(new_children)
# Ensure everyone got reparented
self.assertEqual(new_children[b'c'].parents, [self.root[b'b']])
self.assertEqual(new_children[b'd'].parents, [self.root[b'b']])
self.assertEqual(self.nodes[b'root/b/c'].parents, [])
self.assertEqual(new_children[b"c"].parents, [self.root[b"b"]])
self.assertEqual(new_children[b"d"].parents, [self.root[b"b"]])
self.assertEqual(self.nodes[b"root/b/c"].parents, [])
hash_root2 = self.root.hash
self.assertNotEqual(hash_root, hash_root2)
self.assertIn(b'root/b/new_c', hash_root2)
self.assertIn(b'root/b/d', hash_root2)
self.assertIn(b"root/b/new_c", hash_root2)
self.assertIn(b"root/b/d", hash_root2)
hash_b2 = self.root[b'b'].hash
hash_b2 = self.root[b"b"].hash
self.assertNotEqual(hash_b, hash_b2)
for key, node in self.nodes.items():
if key in (b'root', b'root/b'):
if key in (b"root", b"root/b"):
self.assertEqual(node.compute_hash_called, 2)
else:
self.assertEqual(node.compute_hash_called, 1)
# Ensure we collected root, root/b, and both new children
collected_after_update = self.root.collect()
self.assertCountEqual(
collected_after_update[MerkleTestNode.type],
[self.nodes[b'root'].hash, self.nodes[b'root/b'].hash,
new_children[b'c'].hash, new_children[b'd'].hash],
self.assertEqual(
collected_after_update,
{
self.nodes[b"root"],
self.nodes[b"root/b"],
new_children[b"c"],
new_children[b"d"],
},
)
# test that noop updates doesn't invalidate anything
self.root[b'a'][b'b'].update({})
self.assertEqual(self.root.collect(), {})
self.root[b"a"][b"b"].update({})
self.assertEqual(self.root.collect(), set())
# Copyright (C) 2019 The Software Heritage developers
# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import collections
import copy
import datetime
import hashlib
import re
from typing import Any, List, Optional, Tuple, Union
import attr
from attrs_strict import AttributeTypeError
import dateutil
from hypothesis import given
from hypothesis.strategies import binary, none
import pytest
from swh.model.hypothesis_strategies import objects
from swh.model.collections import ImmutableDict
from swh.model.from_disk import DentryPerms
import swh.model.git_objects
from swh.model.hashutil import MultiHash, hash_to_bytes
import swh.model.hypothesis_strategies as strategies
import swh.model.model
from swh.model.model import (
BaseModel,
Content,
Directory,
DirectoryEntry,
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
MissingData,
ModelObjectType,
Origin,
OriginVisit,
OriginVisitStatus,
Person,
RawExtrinsicMetadata,
Release,
Revision,
SkippedContent,
Snapshot,
SnapshotBranch,
SnapshotTargetType,
Timestamp,
TimestampOverflowException,
TimestampWithTimezone,
optimized_validator,
)
import swh.model.swhids
from swh.model.swhids import CoreSWHID, ExtendedSWHID, ObjectType
from swh.model.tests.swh_model_data import TEST_OBJECTS
from swh.model.tests.test_identifiers import (
TS_DATETIMES,
TS_TIMEZONES,
directory_example,
metadata_example,
release_example,
revision_example,
snapshot_example,
)
EXAMPLE_HASH = hash_to_bytes("94a9ed024d3859793618152ea559a168bbcbb5e2")
@given(objects())
@given(
strategies.objects(
blacklist_types={
ModelObjectType.ORIGIN,
ModelObjectType.ORIGIN_VISIT,
ModelObjectType.ORIGIN_VISIT_STATUS,
}
)
)
def test_todict_inverse_fromdict(objtype_and_obj):
(obj_type, obj) = objtype_and_obj
obj_as_dict = obj.to_dict()
obj_as_dict_copy = copy.deepcopy(obj_as_dict)
......@@ -24,3 +88,2184 @@ def test_todict_inverse_fromdict(objtype_and_obj):
# Check the composition of from_dict and to_dict is the identity
assert obj_as_dict == type(obj).from_dict(obj_as_dict).to_dict()
# In some case, python-dateutil build a `tzfile` object from the
# content of a tarball. In such case the `tzfile._filename` attribute will refer to the
# filepath within the tarball, making the __repr__ unusable. We work around
# this by replacing the tzfile by a gettz call, as the filename matches the
# timezone identifier.
#
# We detect the bogus tzfile __repr__ by checking if the path is absolute. If
# the path is not absolute, we are in the tarball case.
RE_FIX_TZ_FILE = re.compile(r"tzfile\('([^/][^']*)'\)")
@given(strategies.objects())
def test_repr(objtype_and_obj):
"""Checks every model object has a working repr(), and that it can be eval()uated
(so that printed objects can be copy-pasted to write test cases.)"""
(obj_type, obj) = objtype_and_obj
r = repr(obj)
env = {
"tzutc": lambda: datetime.timezone.utc,
"tzfile": dateutil.tz.tzfile,
"gettz": dateutil.tz.gettz,
"hash_to_bytes": hash_to_bytes,
**swh.model.swhids.__dict__,
**swh.model.model.__dict__,
}
# replace bogus tzfile __repr__ on the fly
r = RE_FIX_TZ_FILE.sub(r"gettz('\1')", r)
assert eval(r, env) == obj
@attr.s
class Cls1:
pass
@attr.s
class Cls2(Cls1):
pass
_custom_namedtuple = collections.namedtuple("_custom_namedtuple", "a b")
class _custom_tuple(tuple):
pass
# List of (type, valid_values, invalid_values)
_TYPE_VALIDATOR_PARAMETERS: List[Tuple[Any, List[Any], List[Any]]] = [
# base types:
(
bool,
[True, False],
[-1, 0, 1, 42, 1000, None, "123", 0.0, (), ("foo",), ImmutableDict()],
),
(
int,
[-1, 0, 1, 42, 1000, DentryPerms.directory, True, False],
[None, "123", 0.0, (), ImmutableDict()],
),
(
float,
[-1.0, 0.0, 1.0, float("infinity"), float("NaN")],
[True, False, None, 1, "1.2", (), ImmutableDict()],
),
(
bytes,
[b"", b"123"],
[None, bytearray(b"\x12\x34"), "123", 0, 123, (), (1, 2, 3), ImmutableDict()],
),
(str, ["", "123"], [None, b"123", b"", 0, (), (1, 2, 3), ImmutableDict()]),
(None, [None], [b"", b"123", "", "foo", 0, 123, ImmutableDict(), float("NaN")]),
# unions:
(
Optional[int],
[None, -1, 0, 1, 42, 1000, DentryPerms.directory],
["123", 0.0, (), ImmutableDict()],
),
(
Optional[bytes],
[None, b"", b"123"],
["123", "", 0, (), (1, 2, 3), ImmutableDict()],
),
(
Union[str, bytes],
["", "123", b"123", b""],
[None, 0, (), (1, 2, 3), ImmutableDict()],
),
(
Union[str, bytes, None],
["", "123", b"123", b"", None],
[0, (), (1, 2, 3), ImmutableDict()],
),
# tuples
(
Tuple[str, str],
[("foo", "bar"), ("", ""), _custom_namedtuple("", ""), _custom_tuple(("", ""))],
[("foo",), ("foo", "bar", "baz"), ("foo", 42), (42, "foo")],
),
(
Tuple[bytes, bytes],
[
(b"foo", b"bar"),
(b"", b""),
_custom_namedtuple(b"", b""),
_custom_tuple((b"", b"")),
],
[(b"foo",), (b"foo", b"bar", b"baz"), (b"foo", 42), (42, b"foo")],
),
(
Tuple[str, ...],
[
("foo",),
("foo", "bar"),
("", ""),
("foo", "bar", "baz"),
_custom_namedtuple("", ""),
_custom_tuple(("", "")),
],
[("foo", 42), (42, "foo")],
),
# composite generic:
(
Tuple[Union[str, int], Union[str, int]],
[("foo", "foo"), ("foo", 42), (42, "foo"), (42, 42)],
[("foo", b"bar"), (b"bar", "foo")],
),
(
Union[Tuple[str, str], Tuple[int, int]],
[("foo", "foo"), (42, 42)],
[("foo", b"bar"), (b"bar", "foo"), ("foo", 42), (42, "foo")],
),
(
Tuple[Tuple[bytes, bytes], ...],
[(), ((b"foo", b"bar"),), ((b"foo", b"bar"), (b"baz", b"qux"))],
[((b"foo", "bar"),), ((b"foo", b"bar"), ("baz", b"qux"))],
),
# standard types:
(
datetime.datetime,
[
datetime.datetime(2021, 12, 15, 12, 59, 27),
datetime.datetime(2021, 12, 15, 12, 59, 27, tzinfo=datetime.timezone.utc),
],
[None, 123],
),
# ImmutableDict
(
ImmutableDict[str, int],
[
ImmutableDict(),
ImmutableDict({"foo": 42}),
ImmutableDict({"foo": 42, "bar": 123}),
],
[ImmutableDict({"foo": "bar"}), ImmutableDict({42: 123})],
),
# Any:
(
object,
[-1, 0, 1, 42, 1000, None, "123", 0.0, (), ImmutableDict()],
[],
),
(
Any,
[-1, 0, 1, 42, 1000, None, "123", 0.0, (), ImmutableDict()],
[],
),
(
ImmutableDict[Any, int],
[
ImmutableDict(),
ImmutableDict({"foo": 42}),
ImmutableDict({"foo": 42, "bar": 123}),
ImmutableDict({42: 123}),
],
[ImmutableDict({"foo": "bar"})],
),
(
ImmutableDict[str, Any],
[
ImmutableDict(),
ImmutableDict({"foo": 42}),
ImmutableDict({"foo": "bar"}),
ImmutableDict({"foo": 42, "bar": 123}),
],
[ImmutableDict({42: 123})],
),
# attr objects:
(
Timestamp,
[
Timestamp(seconds=123, microseconds=0),
],
[None, "2021-09-28T11:27:59", 123],
),
(
Cls1,
[Cls1(), Cls2()],
[None, b"abcd"],
),
# enums:
(
SnapshotTargetType,
[SnapshotTargetType.CONTENT, SnapshotTargetType.ALIAS],
["content", "alias", 123, None],
),
]
@pytest.mark.parametrize(
"type_,value",
[
pytest.param(type_, value, id=f"type={type_}, value={value}")
for (type_, values, _) in _TYPE_VALIDATOR_PARAMETERS
for value in values
],
)
def test_optimized_type_validator_valid(type_, value):
validator = optimized_validator(type_)
validator(None, attr.ib(type=type_), value)
@pytest.mark.parametrize(
"type_,value",
[
pytest.param(type_, value, id=f"type={type_}, value={value}")
for (type_, _, values) in _TYPE_VALIDATOR_PARAMETERS
for value in values
],
)
def test_optimized_type_validator_invalid(type_, value):
validator = optimized_validator(type_)
with pytest.raises(AttributeTypeError):
validator(None, attr.ib(type=type_), value)
@pytest.mark.parametrize("object_type, objects", TEST_OBJECTS.items())
def test_swh_model_todict_fromdict(object_type, objects):
"""checks model objects in swh_model_data are in correct shape"""
assert objects
for obj in objects:
# Check the composition of from_dict and to_dict is the identity
obj_as_dict = obj.to_dict()
assert obj == type(obj).from_dict(obj_as_dict)
assert obj_as_dict == type(obj).from_dict(obj_as_dict).to_dict()
def test_unique_key():
url = "http://example.org/"
date = datetime.datetime.now(tz=datetime.timezone.utc)
id_ = b"42" * 10
assert Origin(url=url).unique_key() == {"url": url}
assert OriginVisit(origin=url, date=date, type="git").unique_key() == {
"origin": url,
"date": str(date),
}
assert OriginVisitStatus(
origin=url, visit=42, date=date, status="created", snapshot=None
).unique_key() == {
"origin": url,
"visit": "42",
"date": str(date),
}
assert Snapshot.from_dict({**snapshot_example, "id": id_}).unique_key() == id_
assert Release.from_dict({**release_example, "id": id_}).unique_key() == id_
assert Revision.from_dict({**revision_example, "id": id_}).unique_key() == id_
assert Directory.from_dict({**directory_example, "id": id_}).unique_key() == id_
assert (
RawExtrinsicMetadata.from_dict({**metadata_example, "id": id_}).unique_key()
== id_
)
cont = Content.from_data(b"foo")
assert cont.unique_key().hex() == "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33"
kwargs = {
**cont.to_dict(),
"reason": "foo",
"status": "absent",
}
del kwargs["data"]
assert SkippedContent(**kwargs).unique_key() == cont.hashes()
# Anonymization
@given(strategies.objects())
def test_anonymization(objtype_and_obj):
(obj_type, obj) = objtype_and_obj
def check_person(p):
if p is not None:
assert p.name is None
assert p.email is None
assert len(p.fullname) == 32
anon_obj = obj.anonymize()
if obj_type == ModelObjectType.PERSON:
assert anon_obj is not None
check_person(anon_obj)
elif obj_type == ModelObjectType.RELEASE:
assert anon_obj is not None
check_person(anon_obj.author)
elif obj_type == ModelObjectType.REVISION:
assert anon_obj is not None
check_person(anon_obj.author)
check_person(anon_obj.committer)
else:
assert anon_obj is None
# Origin, OriginVisit, OriginVisitStatus
@given(strategies.origins())
def test_todict_origins(origin):
obj = origin.to_dict()
assert "type" not in obj
assert type(origin)(url=origin.url) == type(origin).from_dict(obj)
def test_origin_long_url():
with pytest.raises(ValueError, match="Origin URL is too long"):
Origin(url="https://" + "a" * 3000)
with pytest.raises(ValueError, match="Origin URL is too long"):
Origin(url="https://example.org/" + "a" * 3050)
@given(strategies.origin_visits())
def test_todict_origin_visits(origin_visit):
obj = origin_visit.to_dict()
assert origin_visit == type(origin_visit).from_dict(obj)
def test_origin_visit_naive_datetime():
with pytest.raises(ValueError, match="must be a timezone-aware datetime"):
OriginVisit(
origin="http://foo/",
date=datetime.datetime.now(),
type="git",
)
@given(strategies.origin_visit_statuses())
def test_todict_origin_visit_statuses(origin_visit_status):
obj = origin_visit_status.to_dict()
assert origin_visit_status == type(origin_visit_status).from_dict(obj)
def test_origin_visit_status_naive_datetime():
with pytest.raises(ValueError, match="must be a timezone-aware datetime"):
OriginVisitStatus(
origin="http://foo/",
visit=42,
date=datetime.datetime.now(),
status="ongoing",
snapshot=None,
)
@pytest.fixture
def origin_visit_status_example():
tz = datetime.timezone(datetime.timedelta(minutes=+60))
return OriginVisitStatus(
origin="http://foo/",
visit=42,
date=datetime.datetime.now(tz=tz),
status="full",
snapshot=hash_to_bytes("6e65b86363953b780d92b0a928f3e8fcdd10db36"),
)
def test_origin_visit_status_snapshot_swhid(origin_visit_status_example):
assert origin_visit_status_example.snapshot_swhid() == CoreSWHID.from_string(
"swh:1:snp:6e65b86363953b780d92b0a928f3e8fcdd10db36"
)
def test_origin_visit_status_origin_swhid(origin_visit_status_example):
assert origin_visit_status_example.origin_swhid() == ExtendedSWHID.from_string(
"swh:1:ori:e0cee4b024ab93b037a1c182865942f5430c6fa4"
)
# Timestamp
@given(strategies.timestamps())
def test_timestamps_strategy(timestamp):
attr.validate(timestamp)
def test_timestamp_seconds():
attr.validate(Timestamp(seconds=0, microseconds=0))
with pytest.raises(AttributeTypeError):
Timestamp(seconds="0", microseconds=0)
attr.validate(
Timestamp(
seconds=Timestamp.MAX_SECONDS, microseconds=Timestamp.MAX_MICROSECONDS
)
)
attr.validate(
Timestamp(
seconds=Timestamp.MIN_SECONDS, microseconds=Timestamp.MIN_MICROSECONDS
)
)
with pytest.raises(TimestampOverflowException):
attr.validate(
Timestamp(
seconds=Timestamp.MAX_SECONDS + 1,
microseconds=Timestamp.MAX_MICROSECONDS,
)
)
with pytest.raises(TimestampOverflowException):
attr.validate(
Timestamp(
seconds=Timestamp.MIN_SECONDS - 1,
microseconds=Timestamp.MIN_MICROSECONDS,
)
)
with pytest.raises(TimestampOverflowException):
attr.validate(Timestamp(seconds=2**63 - 1, microseconds=0))
with pytest.raises(ValueError):
Timestamp(seconds=2**63, microseconds=0)
with pytest.raises(TimestampOverflowException):
attr.validate(Timestamp(seconds=-(2**63), microseconds=0))
with pytest.raises(TimestampOverflowException):
Timestamp(seconds=-(2**63) - 1, microseconds=0)
def test_timestamp_microseconds():
attr.validate(Timestamp(seconds=0, microseconds=0))
with pytest.raises(AttributeTypeError):
Timestamp(seconds=0, microseconds="0")
with pytest.raises(ValueError):
attr.validate(
Timestamp(
seconds=Timestamp.MAX_SECONDS,
microseconds=Timestamp.MAX_MICROSECONDS + 1,
)
)
with pytest.raises(ValueError):
attr.validate(Timestamp(seconds=0, microseconds=Timestamp.MAX_MICROSECONDS + 1))
with pytest.raises(ValueError):
attr.validate(Timestamp(seconds=0, microseconds=Timestamp.MIN_MICROSECONDS - 1))
with pytest.raises(ValueError):
attr.validate(
Timestamp(
seconds=Timestamp.MIN_SECONDS,
microseconds=Timestamp.MIN_MICROSECONDS - 1,
)
)
def test_timestamp_from_dict():
assert Timestamp.from_dict({"seconds": 10, "microseconds": 5})
with pytest.raises(AttributeTypeError):
Timestamp.from_dict({"seconds": "10", "microseconds": 5})
with pytest.raises(AttributeTypeError):
Timestamp.from_dict({"seconds": 10, "microseconds": "5"})
with pytest.raises(ValueError):
Timestamp.from_dict({"seconds": 0, "microseconds": -1})
Timestamp.from_dict({"seconds": 0, "microseconds": 10**6 - 1})
with pytest.raises(ValueError):
Timestamp.from_dict({"seconds": 0, "microseconds": 10**6})
# TimestampWithTimezone
def test_timestampwithtimezone():
ts = Timestamp(seconds=0, microseconds=0)
tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"+0000")
attr.validate(tstz)
assert tstz.offset_minutes() == 0
assert tstz.offset_bytes == b"+0000"
tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"+0010")
attr.validate(tstz)
assert tstz.offset_minutes() == 10
assert tstz.offset_bytes == b"+0010"
tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"-0010")
attr.validate(tstz)
assert tstz.offset_minutes() == -10
assert tstz.offset_bytes == b"-0010"
tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"-0000")
attr.validate(tstz)
assert tstz.offset_minutes() == 0
assert tstz.offset_bytes == b"-0000"
tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"-1030")
attr.validate(tstz)
assert tstz.offset_minutes() == -630
assert tstz.offset_bytes == b"-1030"
tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"+1320")
attr.validate(tstz)
assert tstz.offset_minutes() == 800
assert tstz.offset_bytes == b"+1320"
tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"+200")
attr.validate(tstz)
assert tstz.offset_minutes() == 120
assert tstz.offset_bytes == b"+200"
tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"+02")
attr.validate(tstz)
assert tstz.offset_minutes() == 120
assert tstz.offset_bytes == b"+02"
tstz = TimestampWithTimezone(timestamp=ts, offset_bytes=b"+2000000000")
attr.validate(tstz)
assert tstz.offset_minutes() == 0
assert tstz.offset_bytes == b"+2000000000"
with pytest.raises(AttributeTypeError):
TimestampWithTimezone(timestamp=datetime.datetime.now(), offset_bytes=b"+0000")
with pytest.raises((AttributeTypeError, TypeError)):
TimestampWithTimezone(timestamp=ts, offset_bytes=0)
def test_timestampwithtimezone_from_datetime():
# Typical case
tz = datetime.timezone(datetime.timedelta(minutes=+60))
date = datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=tz)
tstz = TimestampWithTimezone.from_datetime(date)
assert tstz == TimestampWithTimezone(
timestamp=Timestamp(
seconds=1582810759,
microseconds=0,
),
offset_bytes=b"+0100",
)
# Typical case (close to epoch)
tz = datetime.timezone(datetime.timedelta(minutes=+60))
date = datetime.datetime(1970, 1, 1, 1, 0, 5, tzinfo=tz)
tstz = TimestampWithTimezone.from_datetime(date)
assert tstz == TimestampWithTimezone(
timestamp=Timestamp(
seconds=5,
microseconds=0,
),
offset_bytes=b"+0100",
)
# non-integer number of seconds before UNIX epoch
date = datetime.datetime(
1969, 12, 31, 23, 59, 59, 100000, tzinfo=datetime.timezone.utc
)
tstz = TimestampWithTimezone.from_datetime(date)
assert tstz == TimestampWithTimezone(
timestamp=Timestamp(
seconds=-1,
microseconds=100000,
),
offset_bytes=b"+0000",
)
# non-integer number of seconds in both the timestamp and the offset
tz = datetime.timezone(datetime.timedelta(microseconds=-600000))
date = datetime.datetime(1969, 12, 31, 23, 59, 59, 600000, tzinfo=tz)
tstz = TimestampWithTimezone.from_datetime(date)
assert tstz == TimestampWithTimezone(
timestamp=Timestamp(
seconds=0,
microseconds=200000,
),
offset_bytes=b"+0000",
)
# timezone offset with non-integer number of seconds, for dates before epoch
# we round down to the previous second, so it should be the same as
# 1969-01-01T23:59:59Z
tz = datetime.timezone(datetime.timedelta(microseconds=900000))
date = datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=tz)
tstz = TimestampWithTimezone.from_datetime(date)
assert tstz == TimestampWithTimezone(
timestamp=Timestamp(
seconds=-1,
microseconds=100000,
),
offset_bytes=b"+0000",
)
def test_timestampwithtimezone_from_naive_datetime():
date = datetime.datetime(2020, 2, 27, 14, 39, 19)
with pytest.raises(ValueError, match="datetime without timezone"):
TimestampWithTimezone.from_datetime(date)
def test_timestampwithtimezone_from_iso8601():
date = "2020-02-27 14:39:19.123456+0100"
tstz = TimestampWithTimezone.from_iso8601(date)
assert tstz == TimestampWithTimezone(
timestamp=Timestamp(
seconds=1582810759,
microseconds=123456,
),
offset_bytes=b"+0100",
)
def test_timestampwithtimezone_from_iso8601_negative_utc():
date = "2020-02-27 13:39:19-0000"
tstz = TimestampWithTimezone.from_iso8601(date)
assert tstz == TimestampWithTimezone(
timestamp=Timestamp(
seconds=1582810759,
microseconds=0,
),
offset_bytes=b"-0000",
)
@pytest.mark.parametrize("date", TS_DATETIMES)
@pytest.mark.parametrize("tz", TS_TIMEZONES)
@pytest.mark.parametrize("microsecond", [0, 1, 10, 100, 1000, 999999])
def test_timestampwithtimezone_to_datetime(date, tz, microsecond):
date = date.replace(tzinfo=tz, microsecond=microsecond)
tstz = TimestampWithTimezone.from_datetime(date)
assert tstz.to_datetime() == date
assert tstz.to_datetime().utcoffset() == date.utcoffset()
def test_timestampwithtimezone_to_datetime__tz_overflow():
ts = 1582810759
date = datetime.datetime.fromtimestamp(ts, datetime.timezone.utc)
tstz = TimestampWithTimezone(
timestamp=Timestamp(seconds=ts, microseconds=0), offset_bytes=b"+9959"
)
assert tstz.to_datetime() == date
assert tstz.to_datetime().utcoffset() == date.utcoffset()
assert int(tstz.to_datetime().timestamp()) == ts
def test_person_from_fullname():
"""The author should have name, email and fullname filled."""
actual_person = Person.from_fullname(b"tony <ynot@dagobah>")
assert actual_person == Person(
fullname=b"tony <ynot@dagobah>",
name=b"tony",
email=b"ynot@dagobah",
)
def test_person_from_fullname_no_email():
"""The author and fullname should be the same as the input (author)."""
actual_person = Person.from_fullname(b"tony")
assert actual_person == Person(
fullname=b"tony",
name=b"tony",
email=None,
)
def test_person_from_fullname_empty_person():
"""Empty person has only its fullname filled with the empty
byte-string.
"""
actual_person = Person.from_fullname(b"")
assert actual_person == Person(
fullname=b"",
name=None,
email=None,
)
def test_git_author_line_to_author():
# edge case out of the way
with pytest.raises(TypeError):
Person.from_fullname(None)
tests = {
b"a <b@c.com>": Person(
name=b"a",
email=b"b@c.com",
fullname=b"a <b@c.com>",
),
b"<foo@bar.com>": Person(
name=None,
email=b"foo@bar.com",
fullname=b"<foo@bar.com>",
),
b"malformed <email": Person(
name=b"malformed", email=b"email", fullname=b"malformed <email"
),
b'malformed <"<br"@ckets>': Person(
name=b"malformed",
email=b'"<br"@ckets',
fullname=b'malformed <"<br"@ckets>',
),
b"trailing <sp@c.e> ": Person(
name=b"trailing",
email=b"sp@c.e",
fullname=b"trailing <sp@c.e> ",
),
b"no<sp@c.e>": Person(
name=b"no",
email=b"sp@c.e",
fullname=b"no<sp@c.e>",
),
b" more <sp@c.es>": Person(
name=b"more",
email=b"sp@c.es",
fullname=b" more <sp@c.es>",
),
b" <>": Person(
name=None,
email=None,
fullname=b" <>",
),
}
for person in sorted(tests):
expected_person = tests[person]
assert expected_person == Person.from_fullname(person)
def test_person_comparison():
"""Check only the fullname attribute is used to compare Person objects"""
person = Person(fullname=b"p1", name=None, email=None)
assert attr.evolve(person, name=b"toto") == person
assert attr.evolve(person, email=b"toto@example.com") == person
person = Person(fullname=b"", name=b"toto", email=b"toto@example.com")
assert attr.evolve(person, fullname=b"dude") != person
# Content
def test_content_get_hash():
hashes = dict(sha1=b"foo", sha1_git=b"bar", sha256=b"baz", blake2s256=b"qux")
c = Content(length=42, status="visible", **hashes)
for hash_name, hash_ in hashes.items():
assert c.get_hash(hash_name) == hash_
def test_content_hashes():
hashes = dict(sha1=b"foo", sha1_git=b"bar", sha256=b"baz", blake2s256=b"qux")
c = Content(length=42, status="visible", **hashes)
assert c.hashes() == hashes
def test_content_data():
c = Content(
length=42,
status="visible",
data=b"foo",
sha1=b"foo",
sha1_git=b"bar",
sha256=b"baz",
blake2s256=b"qux",
)
assert c.with_data() == c
assert c.to_dict() == {
"sha1": b"foo",
"sha1_git": b"bar",
"sha256": b"baz",
"blake2s256": b"qux",
"length": 42,
"status": "visible",
"data": b"foo",
}
def test_content_data_missing():
c = Content(
length=42,
status="visible",
sha1=b"foo",
sha1_git=b"bar",
sha256=b"baz",
blake2s256=b"qux",
)
with pytest.raises(MissingData):
c.with_data()
assert c.to_dict() == {
"sha1": b"foo",
"sha1_git": b"bar",
"sha256": b"baz",
"blake2s256": b"qux",
"length": 42,
"status": "visible",
}
@given(strategies.present_contents_d())
def test_content_from_dict(content_d):
c = Content.from_data(**content_d)
assert c
assert c.ctime == content_d["ctime"]
content_d2 = c.to_dict()
c2 = Content.from_dict(content_d2)
assert c2.ctime == c.ctime
def test_content_from_dict_str_ctime():
# test with ctime as a string
n = datetime.datetime(2020, 5, 6, 12, 34, tzinfo=datetime.timezone.utc)
content_d = {
"ctime": n.isoformat(),
"data": b"",
"length": 0,
"sha1": b"\x00",
"sha256": b"\x00",
"sha1_git": b"\x00",
"blake2s256": b"\x00",
}
c = Content.from_dict(content_d)
assert c.ctime == n
def test_content_from_dict_str_naive_ctime():
# test with ctime as a string
n = datetime.datetime(2020, 5, 6, 12, 34)
content_d = {
"ctime": n.isoformat(),
"data": b"",
"length": 0,
"sha1": b"\x00",
"sha256": b"\x00",
"sha1_git": b"\x00",
"blake2s256": b"\x00",
}
with pytest.raises(ValueError, match="must be a timezone-aware datetime."):
Content.from_dict(content_d)
@given(binary(max_size=4096))
def test_content_from_data(data):
c = Content.from_data(data)
assert c.data == data
assert c.length == len(data)
assert c.status == "visible"
for key, value in MultiHash.from_data(data).digest().items():
assert getattr(c, key) == value
@given(binary(max_size=4096))
def test_hidden_content_from_data(data):
c = Content.from_data(data, status="hidden")
assert c.data == data
assert c.length == len(data)
assert c.status == "hidden"
for key, value in MultiHash.from_data(data).digest().items():
assert getattr(c, key) == value
def test_content_naive_datetime():
c = Content.from_data(b"foo")
with pytest.raises(ValueError, match="must be a timezone-aware datetime"):
Content(
**c.to_dict(),
ctime=datetime.datetime.now(),
)
@given(strategies.present_contents())
def test_content_git_roundtrip(content):
assert content.data is not None
raw = swh.model.git_objects.content_git_object(content)
sha1_git = hashlib.new("sha1", raw).digest()
assert content.sha1_git == sha1_git
@given(strategies.present_contents())
def test_content_evolve(content):
content.check()
assert attr.evolve(content, sha1=b"\x00" * 20) == content.evolve(sha1=b"\x00" * 20)
assert attr.evolve(content, data=b"foo") == content.evolve(data=b"foo")
assert attr.evolve(content, data=None) == content.evolve(data=None)
# SkippedContent
@given(binary(max_size=4096))
def test_skipped_content_from_data(data):
c = SkippedContent.from_data(data, reason="reason")
assert c.reason == "reason"
assert c.length == len(data)
assert c.status == "absent"
for key, value in MultiHash.from_data(data).digest().items():
assert getattr(c, key) == value
@given(strategies.skipped_contents_d())
def test_skipped_content_origin_is_str(skipped_content_d):
assert SkippedContent.from_dict(skipped_content_d)
skipped_content_d["origin"] = "http://path/to/origin"
assert SkippedContent.from_dict(skipped_content_d)
skipped_content_d["origin"] = Origin(url="http://path/to/origin")
with pytest.raises(ValueError, match="origin"):
SkippedContent.from_dict(skipped_content_d)
def test_skipped_content_naive_datetime():
c = SkippedContent.from_data(b"foo", reason="reason")
with pytest.raises(ValueError, match="must be a timezone-aware datetime"):
SkippedContent(
**c.to_dict(),
ctime=datetime.datetime.now(),
)
def test_skipped_content_swhid():
skipped_content = SkippedContent.from_data(b"foo", reason="reason")
assert skipped_content.swhid() == CoreSWHID.from_string(
"swh:1:cnt:19102815663d23f8b75a47e7a01965dcdc96468c"
)
@given(strategies.skipped_contents())
def test_skipped_content_evolve(content):
content.check()
assert attr.evolve(content, sha1=b"\x00" * 20) == content.evolve(sha1=b"\x00" * 20)
assert attr.evolve(content, sha1=None) == content.evolve(sha1=None)
# Directory
@given(strategies.directories(raw_manifest=none()))
def test_directory_check(directory):
directory.check()
directory2 = attr.evolve(directory, id=b"\x00" * 20)
with pytest.raises(ValueError, match="does not match recomputed hash"):
directory2.check()
directory2 = attr.evolve(
directory, raw_manifest=swh.model.git_objects.directory_git_object(directory)
)
with pytest.raises(
ValueError, match="non-none raw_manifest attribute, but does not need it."
):
directory2.check()
@given(strategies.directories(raw_manifest=none()))
def test_directory_raw_manifest(directory):
assert "raw_manifest" not in directory.to_dict()
raw_manifest = b"foo"
id_ = hashlib.new("sha1", raw_manifest).digest()
# Forgot to update the id -> error
directory2 = attr.evolve(directory, raw_manifest=raw_manifest)
assert directory2.to_dict()["raw_manifest"] == raw_manifest
with pytest.raises(ValueError, match="does not match recomputed hash"):
directory2.check()
# id set to the right value -> ok
directory2 = attr.evolve(directory, raw_manifest=raw_manifest, id=id_)
assert directory2.id is not None
assert directory2.id == id_ != directory.id
assert directory2.to_dict()["raw_manifest"] == raw_manifest
directory2.check()
# id implicitly set to the right value -> ok
directory3 = directory.evolve(raw_manifest=raw_manifest)
assert directory3.id is not None
assert directory3.id == id_ != directory.id
assert directory3.to_dict()["raw_manifest"] == raw_manifest
directory3.check()
@given(strategies.directories(raw_manifest=none()))
def test_directory_evolve(directory):
directory.check()
# Add an entry (while making sure it is not a duplicate)
longest_entry_name = max(
(entry.name for entry in directory.entries), key=len, default=b""
)
entries = (
*directory.entries,
DirectoryEntry(
name=longest_entry_name + b"x",
type="file",
target=b"\x00" * 20,
perms=0,
),
)
directory2 = directory.evolve(entries=entries)
assert directory2.entries == entries
assert directory2.id != directory.id, "directory.evolve() did not update the id"
directory2.check()
with pytest.raises(TypeError, match="use attr.evolve"):
directory.evolve(id=b"\x00" * 20)
with pytest.raises(TypeError, match="unexpected keyword argument"):
directory.evolve(foo=b"")
@given(strategies.directories(raw_manifest=none()))
def test_directory_evolve_raw_manifest(directory):
directory2 = directory.evolve(raw_manifest=b"123")
assert directory2 == attr.evolve(directory, id=directory2.id, raw_manifest=b"123")
directory3 = directory2.evolve(entries=())
assert directory3.raw_manifest == directory2.raw_manifest
assert (
directory3.id == directory2.id
), ".evolve() change the id despite raw_manifest being set"
assert directory3 == attr.evolve(
directory, id=directory2.id, entries=(), raw_manifest=b"123"
)
def test_directory_entry_name_validation():
with pytest.raises(ValueError, match="valid directory entry name."):
DirectoryEntry(name=b"foo/", type="dir", target=b"\x00" * 20, perms=0),
def test_directory_duplicate_entry_name():
entries = (
DirectoryEntry(name=b"foo", type="file", target=b"\x00" * 20, perms=0),
DirectoryEntry(name=b"foo", type="dir", target=b"\x01" * 20, perms=1),
)
with pytest.raises(ValueError, match="duplicated entry name"):
Directory(entries=entries)
entries = (
DirectoryEntry(name=b"foo", type="file", target=b"\x00" * 20, perms=0),
DirectoryEntry(name=b"foo", type="file", target=b"\x00" * 20, perms=0),
)
with pytest.raises(ValueError, match="duplicated entry name"):
Directory(entries=entries)
@given(strategies.directories())
def test_directory_from_possibly_duplicated_entries__no_duplicates(directory):
"""
Directory.from_possibly_duplicated_entries should return the directory
unchanged if it has no duplicated entry name.
"""
assert (False, directory) == Directory.from_possibly_duplicated_entries(
id=directory.id, entries=directory.entries, raw_manifest=directory.raw_manifest
)
assert (False, directory) == Directory.from_possibly_duplicated_entries(
entries=directory.entries, raw_manifest=directory.raw_manifest
)
@pytest.mark.parametrize("rev_first", [True, False])
def test_directory_from_possibly_duplicated_entries__rev_and_dir(rev_first):
entries = (
DirectoryEntry(name=b"foo", type="dir", target=b"\x01" * 20, perms=1),
DirectoryEntry(name=b"foo", type="rev", target=b"\x00" * 20, perms=0),
)
if rev_first:
entries = tuple(reversed(entries))
(is_corrupt, dir_) = Directory.from_possibly_duplicated_entries(entries=entries)
assert is_corrupt
assert dir_.entries == (
DirectoryEntry(name=b"foo", type="rev", target=b"\x00" * 20, perms=0),
DirectoryEntry(
name=b"foo_0101010101", type="dir", target=b"\x01" * 20, perms=1
),
)
# order is independent of 'rev_first' because it is always sorted in git order
assert dir_.raw_manifest == (
# fmt: off
b"tree 52\x00"
+ b"0 foo\x00" + b"\x00" * 20
+ b"1 foo\x00" + b"\x01" * 20
# fmt: on
)
@pytest.mark.parametrize("file_first", [True, False])
def test_directory_from_possibly_duplicated_entries__file_and_dir(file_first):
entries = (
DirectoryEntry(name=b"foo", type="dir", target=b"\x01" * 20, perms=1),
DirectoryEntry(name=b"foo", type="file", target=b"\x00" * 20, perms=0),
)
if file_first:
entries = tuple(reversed(entries))
(is_corrupt, dir_) = Directory.from_possibly_duplicated_entries(entries=entries)
assert is_corrupt
assert dir_.entries == (
DirectoryEntry(name=b"foo", type="dir", target=b"\x01" * 20, perms=1),
DirectoryEntry(
name=b"foo_0000000000", type="file", target=b"\x00" * 20, perms=0
),
)
# order is independent of 'file_first' because it is always sorted in git order
assert dir_.raw_manifest == (
# fmt: off
b"tree 52\x00"
+ b"0 foo\x00" + b"\x00" * 20
+ b"1 foo\x00" + b"\x01" * 20
# fmt: on
)
def test_directory_from_possibly_duplicated_entries__two_files1():
entries = (
DirectoryEntry(name=b"foo", type="file", target=b"\x01" * 20, perms=1),
DirectoryEntry(name=b"foo", type="file", target=b"\x00" * 20, perms=0),
)
(is_corrupt, dir_) = Directory.from_possibly_duplicated_entries(entries=entries)
assert is_corrupt
assert dir_.entries == (
DirectoryEntry(name=b"foo", type="file", target=b"\x01" * 20, perms=1),
DirectoryEntry(
name=b"foo_0000000000", type="file", target=b"\x00" * 20, perms=0
),
)
assert dir_.raw_manifest == (
# fmt: off
b"tree 52\x00"
+ b"1 foo\x00" + b"\x01" * 20
+ b"0 foo\x00" + b"\x00" * 20
# fmt: on
)
def test_directory_from_possibly_duplicated_entries__two_files2():
"""
Same as above, but entries are in a different order (and order matters
to break the tie)
"""
entries = (
DirectoryEntry(name=b"foo", type="file", target=b"\x00" * 20, perms=0),
DirectoryEntry(name=b"foo", type="file", target=b"\x01" * 20, perms=1),
)
(is_corrupt, dir_) = Directory.from_possibly_duplicated_entries(entries=entries)
assert is_corrupt
assert dir_.entries == (
DirectoryEntry(name=b"foo", type="file", target=b"\x00" * 20, perms=0),
DirectoryEntry(
name=b"foo_0101010101", type="file", target=b"\x01" * 20, perms=1
),
)
assert dir_.raw_manifest == (
# fmt: off
b"tree 52\x00"
+ b"0 foo\x00" + b"\x00" * 20
+ b"1 foo\x00" + b"\x01" * 20
# fmt: on
)
def test_directory_from_possibly_duplicated_entries__preserve_manifest():
entries = (
DirectoryEntry(name=b"foo", type="dir", target=b"\x01" * 20, perms=1),
DirectoryEntry(name=b"foo", type="rev", target=b"\x00" * 20, perms=0),
)
(is_corrupt, dir_) = Directory.from_possibly_duplicated_entries(
entries=entries, raw_manifest=b"blah"
)
assert is_corrupt
assert dir_.entries == (
DirectoryEntry(name=b"foo", type="rev", target=b"\x00" * 20, perms=0),
DirectoryEntry(
name=b"foo_0101010101", type="dir", target=b"\x01" * 20, perms=1
),
)
assert dir_.raw_manifest == b"blah"
@pytest.fixture
def directory_with_every_possible_type():
return Directory.from_dict(
{
"entries": [
{
"type": "file",
"perms": 33188,
"name": b"README",
"target": hash_to_bytes("37ec8ea2110c0b7a32fbb0e872f6e7debbf95e21"),
},
{
"type": "dir",
"perms": 16384,
"name": b"src",
"target": hash_to_bytes("61e6e867f5d7ba3b40540869bc050b0c4fed9e95"),
},
{
"type": "rev",
"perms": 57344,
"name": b"submodule",
"target": hash_to_bytes("3d531e169db92a16a9a8974f0ae6edf52e52659e"),
},
],
}
)
def test_directory_entry_swhids(directory_with_every_possible_type):
assert [entry.swhid() for entry in directory_with_every_possible_type.entries] == [
CoreSWHID.from_string("swh:1:cnt:37ec8ea2110c0b7a32fbb0e872f6e7debbf95e21"),
CoreSWHID.from_string("swh:1:dir:61e6e867f5d7ba3b40540869bc050b0c4fed9e95"),
CoreSWHID.from_string("swh:1:rev:3d531e169db92a16a9a8974f0ae6edf52e52659e"),
]
# Release
@given(strategies.releases(raw_manifest=none()))
def test_release_check(release):
release.check()
release2 = attr.evolve(release, id=b"\x00" * 20)
with pytest.raises(ValueError, match="does not match recomputed hash"):
release2.check()
release2 = attr.evolve(
release, raw_manifest=swh.model.git_objects.release_git_object(release)
)
with pytest.raises(
ValueError, match="non-none raw_manifest attribute, but does not need it."
):
release2.check()
@given(strategies.releases(raw_manifest=none()))
def test_release_raw_manifest(release):
raw_manifest = b"foo"
id_ = hashlib.new("sha1", raw_manifest).digest()
release2 = attr.evolve(release, raw_manifest=raw_manifest)
assert release2.to_dict()["raw_manifest"] == raw_manifest
with pytest.raises(ValueError, match="does not match recomputed hash"):
release2.check()
release2 = attr.evolve(release, raw_manifest=raw_manifest, id=id_)
assert release2.id is not None
assert release2.id == id_ != release.id
assert release2.to_dict()["raw_manifest"] == raw_manifest
release2.check()
def test_release_target_swhid():
release = Release.from_dict(release_example)
assert release.target_swhid() == CoreSWHID.from_string(
"swh:1:rev:741b2252a5e14d6c60a913c77a6099abe73a854a"
)
@given(strategies.releases(raw_manifest=none()))
def test_release_evolve(release):
release.check()
message = (release.message or b"abc") + b"\n"
release2 = release.evolve(message=message)
assert release2.message == message
assert release2.id != release.id, "release.evolve() did not update the id"
release2.check()
release2 = release.evolve(message=None)
assert release2.message is None
if release.message is None:
assert release2.id == release.id, "no-op release.evolve() updated the id"
else:
assert release2.id != release.id, "release.evolve() did not update the id"
release2.check()
with pytest.raises(TypeError, match="use attr.evolve"):
release.evolve(id=b"\x00" * 20)
with pytest.raises(TypeError, match="unexpected keyword argument"):
release.evolve(foo=b"")
@given(strategies.releases(raw_manifest=none()))
def test_release_evolve_raw_manifest(release):
release2 = release.evolve(raw_manifest=b"123")
assert release2 == attr.evolve(release, id=release2.id, raw_manifest=b"123")
release3 = release2.evolve(message=None)
assert release3.raw_manifest == release2.raw_manifest
assert (
release3.id == release2.id
), ".evolve() change the id despite raw_manifest being set"
assert release3 == attr.evolve(
release, id=release2.id, message=None, raw_manifest=b"123"
)
# Revision
@given(strategies.revisions(raw_manifest=none()))
def test_revision_check(revision):
revision.check()
revision2 = attr.evolve(revision, id=b"\x00" * 20)
with pytest.raises(ValueError, match="does not match recomputed hash"):
revision2.check()
revision2 = attr.evolve(
revision, raw_manifest=swh.model.git_objects.revision_git_object(revision)
)
with pytest.raises(
ValueError, match="non-none raw_manifest attribute, but does not need it."
):
revision2.check()
@given(strategies.revisions(raw_manifest=none()))
def test_revision_raw_manifest(revision):
raw_manifest = b"foo"
id_ = hashlib.new("sha1", raw_manifest).digest()
revision2 = attr.evolve(revision, raw_manifest=raw_manifest)
assert revision2.to_dict()["raw_manifest"] == raw_manifest
with pytest.raises(ValueError, match="does not match recomputed hash"):
revision2.check()
revision2 = attr.evolve(revision, raw_manifest=raw_manifest, id=id_)
assert revision2.id is not None
assert revision2.id == id_ != revision.id
assert revision2.to_dict()["raw_manifest"] == raw_manifest
revision2.check()
def test_revision_extra_headers_no_headers():
rev_dict = revision_example.copy()
rev_dict.pop("id")
rev = Revision.from_dict(rev_dict)
rev_dict = attr.asdict(rev, recurse=False)
rev_model = Revision(**rev_dict)
assert rev_model.metadata is None
assert rev_model.extra_headers == ()
rev_dict["metadata"] = {
"something": "somewhere",
"some other thing": "stranger",
}
rev_model = Revision(**rev_dict)
assert rev_model.metadata == rev_dict["metadata"]
assert rev_model.extra_headers == ()
def test_revision_extra_headers_with_headers():
rev_dict = revision_example.copy()
rev_dict.pop("id")
rev = Revision.from_dict(rev_dict)
rev_dict = attr.asdict(rev, recurse=False)
rev_dict["metadata"] = {
"something": "somewhere",
"some other thing": "stranger",
}
extra_headers = (
(b"header1", b"value1"),
(b"header2", b"42"),
(b"header3", b"should I?\x00"),
(b"header1", b"again"),
)
rev_dict["extra_headers"] = extra_headers
rev_model = Revision(**rev_dict)
assert "extra_headers" not in rev_model.metadata
assert rev_model.extra_headers == extra_headers
def test_revision_extra_headers_in_metadata():
rev_dict = revision_example.copy()
rev_dict.pop("id")
rev = Revision.from_dict(rev_dict)
rev_dict = attr.asdict(rev, recurse=False)
rev_dict["metadata"] = {
"something": "somewhere",
"some other thing": "stranger",
}
extra_headers = (
(b"header1", b"value1"),
(b"header2", b"42"),
(b"header3", b"should I?\x00"),
(b"header1", b"again"),
)
# check the bw-compat init hook does the job
# ie. extra_headers are given in the metadata field
rev_dict["metadata"]["extra_headers"] = extra_headers
rev_model = Revision(**rev_dict)
assert "extra_headers" not in rev_model.metadata
assert rev_model.extra_headers == extra_headers
def test_revision_extra_headers_as_lists():
rev_dict = revision_example.copy()
rev_dict.pop("id")
rev = Revision.from_dict(rev_dict)
rev_dict = attr.asdict(rev, recurse=False)
rev_dict["metadata"] = {}
extra_headers = (
(b"header1", b"value1"),
(b"header2", b"42"),
(b"header3", b"should I?\x00"),
(b"header1", b"again"),
)
# check Revision.extra_headers tuplify does the job
rev_dict["extra_headers"] = [list(x) for x in extra_headers]
rev_model = Revision(**rev_dict)
assert "extra_headers" not in rev_model.metadata
assert rev_model.extra_headers == extra_headers
def test_revision_extra_headers_type_error():
rev_dict = revision_example.copy()
rev_dict.pop("id")
rev = Revision.from_dict(rev_dict)
orig_rev_dict = attr.asdict(rev, recurse=False)
orig_rev_dict["metadata"] = {
"something": "somewhere",
"some other thing": "stranger",
}
extra_headers = (
("header1", b"value1"),
(b"header2", 42),
("header1", "again"),
)
# check headers one at a time
# if given as extra_header
for extra_header in extra_headers:
rev_dict = copy.deepcopy(orig_rev_dict)
rev_dict["extra_headers"] = (extra_header,)
with pytest.raises(AttributeTypeError):
Revision(**rev_dict)
# if given as metadata
for extra_header in extra_headers:
rev_dict = copy.deepcopy(orig_rev_dict)
rev_dict["metadata"]["extra_headers"] = (extra_header,)
with pytest.raises(AttributeTypeError):
Revision(**rev_dict)
def test_revision_extra_headers_from_dict():
rev_dict = revision_example.copy()
rev_dict.pop("id")
rev_model = Revision.from_dict(rev_dict)
assert rev_model.metadata is None
assert rev_model.extra_headers == ()
rev_dict["metadata"] = {
"something": "somewhere",
"some other thing": "stranger",
}
rev_model = Revision.from_dict(rev_dict)
assert rev_model.metadata == rev_dict["metadata"]
assert rev_model.extra_headers == ()
extra_headers = (
(b"header1", b"value1"),
(b"header2", b"42"),
(b"header3", b"should I?\nmaybe\x00\xff"),
(b"header1", b"again"),
)
rev_dict["extra_headers"] = extra_headers
rev_model = Revision.from_dict(rev_dict)
assert "extra_headers" not in rev_model.metadata
assert rev_model.extra_headers == extra_headers
def test_revision_extra_headers_in_metadata_from_dict():
rev_dict = revision_example.copy()
rev_dict.pop("id")
rev_dict["metadata"] = {
"something": "somewhere",
"some other thing": "stranger",
}
extra_headers = (
(b"header1", b"value1"),
(b"header2", b"42"),
(b"header3", b"should I?\nmaybe\x00\xff"),
(b"header1", b"again"),
)
# check the bw-compat init hook does the job
rev_dict["metadata"]["extra_headers"] = extra_headers
rev_model = Revision.from_dict(rev_dict)
assert "extra_headers" not in rev_model.metadata
assert rev_model.extra_headers == extra_headers
def test_revision_extra_headers_as_lists_from_dict():
rev_dict = revision_example.copy()
rev_dict.pop("id")
rev_model = Revision.from_dict(rev_dict)
rev_dict["metadata"] = {
"something": "somewhere",
"some other thing": "stranger",
}
extra_headers = (
(b"header1", b"value1"),
(b"header2", b"42"),
(b"header3", b"should I?\nmaybe\x00\xff"),
(b"header1", b"again"),
)
# check Revision.extra_headers converter does the job
rev_dict["extra_headers"] = [list(x) for x in extra_headers]
rev_model = Revision.from_dict(rev_dict)
assert "extra_headers" not in rev_model.metadata
assert rev_model.extra_headers == extra_headers
def test_revision_no_author_or_committer_from_dict():
rev_dict = revision_example.copy()
rev_dict["author"] = rev_dict["date"] = None
rev_dict["committer"] = rev_dict["committer_date"] = None
rev_model = Revision.from_dict(rev_dict)
assert rev_model.to_dict() == {
**rev_dict,
"parents": tuple(rev_dict["parents"]),
"extra_headers": (),
"metadata": None,
}
def test_revision_none_author_or_committer():
rev_dict = revision_example.copy()
rev_dict["author"] = None
with pytest.raises(ValueError, match=".*date must be None if author is None.*"):
Revision.from_dict(rev_dict)
rev_dict = revision_example.copy()
rev_dict["committer"] = None
with pytest.raises(
ValueError, match=".*committer_date must be None if committer is None.*"
):
Revision.from_dict(rev_dict)
def test_revision_directory_swhid():
revision = Revision.from_dict(revision_example)
assert revision.directory_swhid() == CoreSWHID.from_string(
"swh:1:dir:85a74718d377195e1efd0843ba4f3260bad4fe07"
)
def test_revision_parent_swhids():
revision_d = copy.deepcopy(revision_example)
revision_d["parents"].append(
hash_to_bytes("b2a7e1260492e344fab3cbf91bc13c91e05426fd")
)
revision = Revision.from_dict(revision_d)
assert revision.parent_swhids() == [
CoreSWHID.from_string("swh:1:rev:01e2d0627a9a6edb24c37db45db5ecb31e9de808"),
CoreSWHID.from_string("swh:1:rev:b2a7e1260492e344fab3cbf91bc13c91e05426fd"),
]
@pytest.fixture
def snapshot_with_all_types():
return Snapshot.from_dict(snapshot_example)
def test_snapshot_branch_swhids(snapshot_with_all_types):
assert {
name: branch and branch.swhid()
for (name, branch) in snapshot_with_all_types.branches.items()
} == {
b"directory": CoreSWHID.from_string(
"swh:1:dir:1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8"
),
b"content": CoreSWHID.from_string(
"swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1"
),
b"alias": None,
b"revision": CoreSWHID.from_string(
"swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6"
),
b"release": CoreSWHID.from_string(
"swh:1:rel:7045404f3d1c54e6473c71bbb716529fbad4be24"
),
b"snapshot": CoreSWHID.from_string(
"swh:1:snp:1a8893e6a86f444e8be8e7bda6cb34fb1735a00e"
),
b"dangling": None,
}
@given(strategies.snapshots())
def test_snapshot_evolve(snapshot):
snapshot.check()
# Add an entry (while making sure it is not a duplicate)
longest_branch_name = max(snapshot.branches, key=len, default=b"")
branches = {
**snapshot.branches,
longest_branch_name
+ b"x": SnapshotBranch(
target_type=SnapshotTargetType.RELEASE,
target=b"\x00" * 20,
),
}
snapshot2 = snapshot.evolve(branches=branches)
assert snapshot2.branches == branches
assert snapshot2.id != snapshot.id, "snapshot.evolve() did not update the id"
snapshot2.check()
with pytest.raises(TypeError, match="use attr.evolve"):
snapshot.evolve(id=b"\x00" * 20)
with pytest.raises(TypeError, match="unexpected keyword argument"):
snapshot.evolve(foo=b"")
@given(strategies.revisions(raw_manifest=none()))
def test_revision_evolve(revision):
revision.check()
message = (revision.message or b"abc") + b"\n"
revision2 = revision.evolve(message=message)
assert revision2.message == message
assert revision2.id != revision.id, "revision.evolve() did not update the id"
revision2.check()
revision2 = revision.evolve(message=None)
assert revision2.message is None
if revision.message is None:
assert revision2.id == revision.id, "no-op revision.evolve() updated the id"
else:
assert revision2.id != revision.id, "revision.evolve() did not update the id"
revision2.check()
with pytest.raises(TypeError, match="use attr.evolve"):
revision.evolve(id=b"\x00" * 20)
with pytest.raises(TypeError, match="unexpected keyword argument"):
revision.evolve(foo=b"")
@given(strategies.revisions(raw_manifest=none()))
def test_revision_evolve_raw_manifest(revision):
revision2 = revision.evolve(raw_manifest=b"123")
assert revision2 == attr.evolve(revision, id=revision2.id, raw_manifest=b"123")
revision3 = revision2.evolve(message=None)
assert revision3.raw_manifest == revision2.raw_manifest
assert (
revision3.id == revision2.id
), ".evolve() change the id despite raw_manifest being set"
assert revision3 == attr.evolve(
revision, id=revision2.id, message=None, raw_manifest=b"123"
)
@given(strategies.objects(split_content=True))
def test_object_type(objtype_and_obj):
obj_type, obj = objtype_and_obj
assert obj_type == obj.object_type
def test_object_type_is_final():
checked_classes = set()
object_types = set()
def check_final(cls):
if cls in checked_classes:
return
checked_classes.add(cls)
obj_type = sentinel = object()
obj_type = getattr(cls, "object_type", sentinel)
if getattr(obj_type, "__isabstractmethod__", False):
obj_type = sentinel
if obj_type is sentinel:
assert cls.__subclasses__()
else:
assert not cls.__subclasses__()
assert cls.object_type not in object_types
object_types.add(cls.object_type)
for subcls in cls.__subclasses__():
check_final(subcls)
check_final(BaseModel)
_metadata_authority = MetadataAuthority(
type=MetadataAuthorityType.FORGE,
url="https://forge.softwareheritage.org",
)
_metadata_fetcher = MetadataFetcher(
name="test-fetcher",
version="0.0.1",
)
_content_swhid = ExtendedSWHID.from_string(
"swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"
)
_origin_url = "https://forge.softwareheritage.org/source/swh-model.git"
_origin_swhid = ExtendedSWHID.from_string(
"swh:1:ori:433b4f5612f0720ed51fa7aeaf43a3625870057b"
)
_dummy_qualifiers = {"origin": "https://example.com", "lines": "42"}
_common_metadata_fields = dict(
discovery_date=datetime.datetime(
2021, 1, 29, 13, 57, 9, tzinfo=datetime.timezone.utc
),
authority=_metadata_authority,
fetcher=_metadata_fetcher,
format="json",
metadata=b'{"origin": "https://example.com", "lines": "42"}',
)
def test_metadata_valid():
"""Checks valid RawExtrinsicMetadata objects don't raise an error."""
# Simplest case
RawExtrinsicMetadata(target=_origin_swhid, **_common_metadata_fields)
# Object with an SWHID
RawExtrinsicMetadata(
target=_content_swhid,
**_common_metadata_fields,
)
def test_metadata_from_old_dict():
common_fields = {
"authority": {"type": "forge", "url": "https://forge.softwareheritage.org"},
"fetcher": {
"name": "test-fetcher",
"version": "0.0.1",
},
"discovery_date": _common_metadata_fields["discovery_date"],
"format": "json",
"metadata": b'{"origin": "https://example.com", "lines": "42"}',
}
m = RawExtrinsicMetadata(
target=_origin_swhid,
**_common_metadata_fields,
)
assert (
RawExtrinsicMetadata.from_dict(
{"id": m.id, "target": _origin_url, "type": "origin", **common_fields}
)
== m
)
m = RawExtrinsicMetadata(
target=_content_swhid,
**_common_metadata_fields,
)
assert (
RawExtrinsicMetadata.from_dict(
{"target": str(_content_swhid), "type": "content", **common_fields}
)
== m
)
def test_metadata_to_dict():
"""Checks valid RawExtrinsicMetadata objects don't raise an error."""
common_fields = {
"authority": {"type": "forge", "url": "https://forge.softwareheritage.org"},
"fetcher": {
"name": "test-fetcher",
"version": "0.0.1",
},
"discovery_date": _common_metadata_fields["discovery_date"],
"format": "json",
"metadata": b'{"origin": "https://example.com", "lines": "42"}',
}
m = RawExtrinsicMetadata(
target=_origin_swhid,
**_common_metadata_fields,
)
assert m.to_dict() == {
"target": str(_origin_swhid),
"id": b"\xa3)q\x0f\xf7p\xc7\xb0\\O\xe8\x84\x83Z\xb0]\x81\xe9\x95\x13",
**common_fields,
}
assert RawExtrinsicMetadata.from_dict(m.to_dict()) == m
m = RawExtrinsicMetadata(
target=_content_swhid,
**_common_metadata_fields,
)
assert m.to_dict() == {
"target": "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
"id": b"\xbc\xa3U\xddf\x19U\xc5\xd2\xd7\xdfK\xd7c\x1f\xa8\xfeh\x992",
**common_fields,
}
assert RawExtrinsicMetadata.from_dict(m.to_dict()) == m
hash_hex = "6162" * 10
hash_bin = b"ab" * 10
m = RawExtrinsicMetadata(
target=_content_swhid,
**_common_metadata_fields,
origin="https://example.org/",
snapshot=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=hash_bin),
release=CoreSWHID(object_type=ObjectType.RELEASE, object_id=hash_bin),
revision=CoreSWHID(object_type=ObjectType.REVISION, object_id=hash_bin),
path=b"/foo/bar",
directory=CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=hash_bin),
)
assert m.to_dict() == {
"target": "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
"id": b"\x14l\xb0\x1f\xb9\xc0{)\xc7\x0f\xbd\xc0*,YZ\xf5C\xab\xfc",
**common_fields,
"origin": "https://example.org/",
"snapshot": f"swh:1:snp:{hash_hex}",
"release": f"swh:1:rel:{hash_hex}",
"revision": f"swh:1:rev:{hash_hex}",
"path": b"/foo/bar",
"directory": f"swh:1:dir:{hash_hex}",
}
assert RawExtrinsicMetadata.from_dict(m.to_dict()) == m
def test_metadata_invalid_target():
"""Checks various invalid values for the 'target' field."""
# SWHID passed as string instead of SWHID
with pytest.raises(ValueError, match="target must be.*ExtendedSWHID"):
RawExtrinsicMetadata(
target="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
**_common_metadata_fields,
)
def test_metadata_naive_datetime():
with pytest.raises(ValueError, match="must be a timezone-aware datetime"):
RawExtrinsicMetadata(
target=_origin_swhid,
**{**_common_metadata_fields, "discovery_date": datetime.datetime.now()},
)
def test_metadata_validate_context_origin():
"""Checks validation of RawExtrinsicMetadata.origin."""
# Origins can't have an 'origin' context
with pytest.raises(
ValueError, match="Unexpected 'origin' context for origin object"
):
RawExtrinsicMetadata(
target=_origin_swhid,
origin=_origin_url,
**_common_metadata_fields,
)
# but all other types can
RawExtrinsicMetadata(
target=_content_swhid,
origin=_origin_url,
**_common_metadata_fields,
)
# SWHIDs aren't valid origin URLs
with pytest.raises(ValueError, match="SWHID used as context origin URL"):
RawExtrinsicMetadata(
target=_content_swhid,
origin="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
**_common_metadata_fields,
)
def test_metadata_validate_context_visit():
"""Checks validation of RawExtrinsicMetadata.visit."""
# Origins can't have a 'visit' context
with pytest.raises(
ValueError, match="Unexpected 'visit' context for origin object"
):
RawExtrinsicMetadata(
target=_origin_swhid,
visit=42,
**_common_metadata_fields,
)
# but all other types can
RawExtrinsicMetadata(
target=_content_swhid,
origin=_origin_url,
visit=42,
**_common_metadata_fields,
)
# Missing 'origin'
with pytest.raises(ValueError, match="'origin' context must be set if 'visit' is"):
RawExtrinsicMetadata(
target=_content_swhid,
visit=42,
**_common_metadata_fields,
)
# visit id must be positive
with pytest.raises(ValueError, match="Nonpositive visit id"):
RawExtrinsicMetadata(
target=_content_swhid,
origin=_origin_url,
visit=-42,
**_common_metadata_fields,
)
def test_metadata_validate_context_snapshot():
"""Checks validation of RawExtrinsicMetadata.snapshot."""
# Origins can't have a 'snapshot' context
with pytest.raises(
ValueError, match="Unexpected 'snapshot' context for origin object"
):
RawExtrinsicMetadata(
target=_origin_swhid,
snapshot=CoreSWHID(
object_type=ObjectType.SNAPSHOT,
object_id=EXAMPLE_HASH,
),
**_common_metadata_fields,
)
# but content can
RawExtrinsicMetadata(
target=_content_swhid,
snapshot=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=EXAMPLE_HASH),
**_common_metadata_fields,
)
# SWHID type doesn't match the expected type of this context key
with pytest.raises(
ValueError, match="Expected SWHID type 'snapshot', got 'content'"
):
RawExtrinsicMetadata(
target=_content_swhid,
snapshot=CoreSWHID(
object_type=ObjectType.CONTENT,
object_id=EXAMPLE_HASH,
),
**_common_metadata_fields,
)
def test_metadata_validate_context_release():
"""Checks validation of RawExtrinsicMetadata.release."""
# Origins can't have a 'release' context
with pytest.raises(
ValueError, match="Unexpected 'release' context for origin object"
):
RawExtrinsicMetadata(
target=_origin_swhid,
release=CoreSWHID(
object_type=ObjectType.RELEASE,
object_id=EXAMPLE_HASH,
),
**_common_metadata_fields,
)
# but content can
RawExtrinsicMetadata(
target=_content_swhid,
release=CoreSWHID(object_type=ObjectType.RELEASE, object_id=EXAMPLE_HASH),
**_common_metadata_fields,
)
# SWHID type doesn't match the expected type of this context key
with pytest.raises(
ValueError, match="Expected SWHID type 'release', got 'content'"
):
RawExtrinsicMetadata(
target=_content_swhid,
release=CoreSWHID(
object_type=ObjectType.CONTENT,
object_id=EXAMPLE_HASH,
),
**_common_metadata_fields,
)
def test_metadata_validate_context_revision():
"""Checks validation of RawExtrinsicMetadata.revision."""
# Origins can't have a 'revision' context
with pytest.raises(
ValueError, match="Unexpected 'revision' context for origin object"
):
RawExtrinsicMetadata(
target=_origin_swhid,
revision=CoreSWHID(
object_type=ObjectType.REVISION,
object_id=EXAMPLE_HASH,
),
**_common_metadata_fields,
)
# but content can
RawExtrinsicMetadata(
target=_content_swhid,
revision=CoreSWHID(object_type=ObjectType.REVISION, object_id=EXAMPLE_HASH),
**_common_metadata_fields,
)
# SWHID type doesn't match the expected type of this context key
with pytest.raises(
ValueError, match="Expected SWHID type 'revision', got 'content'"
):
RawExtrinsicMetadata(
target=_content_swhid,
revision=CoreSWHID(
object_type=ObjectType.CONTENT,
object_id=EXAMPLE_HASH,
),
**_common_metadata_fields,
)
def test_metadata_validate_context_path():
"""Checks validation of RawExtrinsicMetadata.path."""
# Origins can't have a 'path' context
with pytest.raises(ValueError, match="Unexpected 'path' context for origin object"):
RawExtrinsicMetadata(
target=_origin_swhid,
path=b"/foo/bar",
**_common_metadata_fields,
)
# but content can
RawExtrinsicMetadata(
target=_content_swhid,
path=b"/foo/bar",
**_common_metadata_fields,
)
def test_metadata_validate_context_directory():
"""Checks validation of RawExtrinsicMetadata.directory."""
# Origins can't have a 'directory' context
with pytest.raises(
ValueError, match="Unexpected 'directory' context for origin object"
):
RawExtrinsicMetadata(
target=_origin_swhid,
directory=CoreSWHID(
object_type=ObjectType.DIRECTORY,
object_id=EXAMPLE_HASH,
),
**_common_metadata_fields,
)
# but content can
RawExtrinsicMetadata(
target=_content_swhid,
directory=CoreSWHID(
object_type=ObjectType.DIRECTORY,
object_id=EXAMPLE_HASH,
),
**_common_metadata_fields,
)
# SWHID type doesn't match the expected type of this context key
with pytest.raises(
ValueError, match="Expected SWHID type 'directory', got 'content'"
):
RawExtrinsicMetadata(
target=_content_swhid,
directory=CoreSWHID(
object_type=ObjectType.CONTENT,
object_id=EXAMPLE_HASH,
),
**_common_metadata_fields,
)
def test_metadata_normalize_discovery_date():
fields_copy = {**_common_metadata_fields}
truncated_date = fields_copy.pop("discovery_date")
assert truncated_date.microsecond == 0
# Check for TypeError on disabled object type: we removed attrs_strict's
# type_validator
with pytest.raises(TypeError):
RawExtrinsicMetadata(
target=_content_swhid, discovery_date="not a datetime", **fields_copy
)
# Check for truncation to integral second
date_with_us = truncated_date.replace(microsecond=42)
md = RawExtrinsicMetadata(
target=_content_swhid,
discovery_date=date_with_us,
**fields_copy,
)
assert md.discovery_date == truncated_date
assert md.discovery_date.tzinfo == datetime.timezone.utc
# Check that the timezone gets normalized. Timezones can be offset by a
# non-integral number of seconds, so we need to handle that.
timezone = datetime.timezone(offset=datetime.timedelta(hours=2))
date_with_tz = truncated_date.astimezone(timezone)
assert date_with_tz.tzinfo != datetime.timezone.utc
md = RawExtrinsicMetadata(
target=_content_swhid,
discovery_date=date_with_tz,
**fields_copy,
)
assert md.discovery_date == truncated_date
assert md.discovery_date.tzinfo == datetime.timezone.utc
def test_revision_repr():
from swh.model.model import RevisionType # noqa
revision = Revision.from_dict(revision_example)
rev_repr = repr(revision)
assert rev_repr == (
"Revision(message=b'Linux 4.2-rc2\\n', "
"author=Person(fullname=b'Linus Torvalds <torvalds@linux-foundation.org>', "
"name=b'Linus Torvalds', email=b'torvalds@linux-foundation.org'), "
"committer=Person(fullname=b'Linus Torvalds <torvalds@linux-foundation.org>', "
"name=b'Linus Torvalds', email=b'torvalds@linux-foundation.org'), "
"date=TimestampWithTimezone(timestamp=Timestamp(seconds=1436739030, microseconds=0), "
"offset_bytes=b'-0700'), "
"committer_date=TimestampWithTimezone(timestamp=Timestamp(seconds=1436739030, "
"microseconds=0), offset_bytes=b'-0700'), "
"type=RevisionType.GIT, "
"directory=hash_to_bytes('85a74718d377195e1efd0843ba4f3260bad4fe07'), "
"synthetic=False, metadata=None, "
"parents=(hash_to_bytes('01e2d0627a9a6edb24c37db45db5ecb31e9de808'),), "
"id=hash_to_bytes('bc0195aad0daa2ad5b0d76cce22b167bc3435590'), "
"extra_headers=(), raw_manifest=None)"
)
assert eval(rev_repr) == revision
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import attr
import pytest
from swh.model.model import ModelObjectType
from swh.model.tests.swh_model_data import TEST_OBJECTS
@pytest.mark.parametrize("object_type, objects", TEST_OBJECTS.items())
def test_swh_model_data(object_type, objects):
"""checks model objects in swh_model_data are in correct shape"""
assert objects
for obj in objects:
assert obj.object_type == object_type
attr.validate(obj)
@pytest.mark.parametrize(
"object_type",
(
ModelObjectType.DIRECTORY,
ModelObjectType.REVISION,
ModelObjectType.RELEASE,
ModelObjectType.SNAPSHOT,
),
)
def test_swh_model_data_hash(object_type):
for obj in TEST_OBJECTS[object_type]:
assert (
obj.compute_hash() == obj.id
), f"{obj.compute_hash().hex()} != {obj.id.hex()}"
def test_ensure_visit_status_date_consistency():
"""ensure origin-visit-status dates are more recent than their visit counterpart
The origin-visit-status dates needs to be shifted slightly in the future from their
visit dates counterpart. Otherwise, we are hitting storage-wise the "on conflict"
ignore policy (because origin-visit-add creates an origin-visit-status with the same
parameters from the origin-visit {origin, visit, date}...
"""
visits = TEST_OBJECTS[ModelObjectType.ORIGIN_VISIT]
visit_statuses = TEST_OBJECTS[ModelObjectType.ORIGIN_VISIT_STATUS]
for visit, visit_status in zip(visits, visit_statuses):
assert visit.origin == visit_status.origin
assert visit.visit == visit_status.visit
assert visit.date < visit_status.date
def test_ensure_visit_status_snapshot_consistency():
"""ensure origin-visit-status snapshots exist in the test dataset"""
snapshots = [snp.id for snp in TEST_OBJECTS[ModelObjectType.SNAPSHOT]]
for visit_status in TEST_OBJECTS[ModelObjectType.ORIGIN_VISIT_STATUS]:
if visit_status.snapshot:
assert visit_status.snapshot in snapshots
# Copyright (C) 2015-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import itertools
import attr
import pytest
from swh.model.exceptions import ValidationError
from swh.model.hashutil import hash_to_bytes as _x
from swh.model.swhids import (
SWHID_QUALIFIERS,
CoreSWHID,
ExtendedObjectType,
ExtendedSWHID,
ObjectType,
QualifiedSWHID,
)
dummy_qualifiers = {"origin": "https://example.com", "lines": "42"}
# SWHIDs that are outright invalid, no matter the context
INVALID_SWHIDS = [
"swh:1:cnt",
"swh:1:",
"swh:",
"swh:1:cnt:",
"foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505",
"swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505",
"swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505",
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;malformed",
"swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d",
"swh:1:snp:foo",
# wrong qualifier: ori should be origin
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa
# wrong qualifier: anc should be anchor
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anc=1;visit=1;path=/", # noqa
# wrong qualifier: vis should be visit
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=1;vis=1;path=/", # noqa
# wrong qualifier: pa should be path
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=1;visit=1;pa=/", # noqa
# wrong qualifier: line should be lines
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;line=10;origin=something;anchor=1;visit=1;path=/", # noqa
# wrong qualifier value: it contains space before of after
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin= https://some-url", # noqa
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=some-anchor ", # noqa
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=some-anchor ;visit=1", # noqa
# invalid swhid: whitespaces
"swh :1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa
"swh: 1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa
"swh: 1: dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa
"swh:1: dir: 0b6959356d30f1a4e9b7f6bca59b9a336464c03d",
"swh:1: dir: 0b6959356d30f1a4e9b7f6bca59b9a336464c03d; origin=blah",
"swh:1: dir: 0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12",
# other whitespaces
"swh\t:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12",
"swh:1\n:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12",
"swh:1:\rdir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12",
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d\f;lines=12",
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12\v",
]
SWHID_CLASSES = [CoreSWHID, QualifiedSWHID, ExtendedSWHID]
@pytest.mark.parametrize(
"invalid_swhid,swhid_class", itertools.product(INVALID_SWHIDS, SWHID_CLASSES)
)
def test_swhid_parsing_error(invalid_swhid, swhid_class):
"""Tests SWHID strings that are invalid for all SWHID classes do raise
a ValidationError"""
with pytest.raises(ValidationError):
swhid_class.from_string(invalid_swhid)
# string SWHIDs, and how they should be parsed by each of the classes,
# or None if the class does not support it
HASH = "94a9ed024d3859793618152ea559a168bbcbb5e2"
VALID_SWHIDS = [
(
f"swh:1:cnt:{HASH}",
CoreSWHID(
object_type=ObjectType.CONTENT,
object_id=_x(HASH),
),
QualifiedSWHID(
object_type=ObjectType.CONTENT,
object_id=_x(HASH),
),
ExtendedSWHID(
object_type=ExtendedObjectType.CONTENT,
object_id=_x(HASH),
),
),
(
f"swh:1:dir:{HASH}",
CoreSWHID(
object_type=ObjectType.DIRECTORY,
object_id=_x(HASH),
),
QualifiedSWHID(
object_type=ObjectType.DIRECTORY,
object_id=_x(HASH),
),
ExtendedSWHID(
object_type=ExtendedObjectType.DIRECTORY,
object_id=_x(HASH),
),
),
(
f"swh:1:rev:{HASH}",
CoreSWHID(
object_type=ObjectType.REVISION,
object_id=_x(HASH),
),
QualifiedSWHID(
object_type=ObjectType.REVISION,
object_id=_x(HASH),
),
ExtendedSWHID(
object_type=ExtendedObjectType.REVISION,
object_id=_x(HASH),
),
),
(
f"swh:1:rel:{HASH}",
CoreSWHID(
object_type=ObjectType.RELEASE,
object_id=_x(HASH),
),
QualifiedSWHID(
object_type=ObjectType.RELEASE,
object_id=_x(HASH),
),
ExtendedSWHID(
object_type=ExtendedObjectType.RELEASE,
object_id=_x(HASH),
),
),
(
f"swh:1:snp:{HASH}",
CoreSWHID(
object_type=ObjectType.SNAPSHOT,
object_id=_x(HASH),
),
QualifiedSWHID(
object_type=ObjectType.SNAPSHOT,
object_id=_x(HASH),
),
ExtendedSWHID(
object_type=ExtendedObjectType.SNAPSHOT,
object_id=_x(HASH),
),
),
(
f"swh:1:cnt:{HASH};origin=https://github.com/python/cpython;lines=1-18",
None, # CoreSWHID does not allow qualifiers
QualifiedSWHID(
object_type=ObjectType.CONTENT,
object_id=_x(HASH),
origin="https://github.com/python/cpython",
lines=(1, 18),
),
None, # Neither does ExtendedSWHID
),
(
f"swh:1:cnt:{HASH};origin=https://github.com/python/cpython;lines=1-18/",
None, # likewise
None,
None, # likewise
),
(
f"swh:1:cnt:{HASH};origin=https://github.com/python/cpython;lines=18",
None, # likewise
QualifiedSWHID(
object_type=ObjectType.CONTENT,
object_id=_x(HASH),
origin="https://github.com/python/cpython",
lines=(18, None),
),
None, # likewise
),
(
f"swh:1:dir:{HASH};origin=deb://Debian/packages/linuxdoc-tools",
None, # likewise
QualifiedSWHID(
object_type=ObjectType.DIRECTORY,
object_id=_x(HASH),
origin="deb://Debian/packages/linuxdoc-tools",
),
None, # likewise
),
(
f"swh:1:ori:{HASH}",
None, # CoreSWHID does not allow origin pseudo-SWHIDs
None, # Neither does QualifiedSWHID
ExtendedSWHID(
object_type=ExtendedObjectType.ORIGIN,
object_id=_x(HASH),
),
),
(
f"swh:1:emd:{HASH}",
None, # likewise for metadata pseudo-SWHIDs
None, # Neither does QualifiedSWHID
ExtendedSWHID(
object_type=ExtendedObjectType.RAW_EXTRINSIC_METADATA,
object_id=_x(HASH),
),
),
(
f"swh:1:emd:{HASH};origin=https://github.com/python/cpython",
None, # CoreSWHID does not allow metadata pseudo-SWHIDs or qualifiers
None, # QualifiedSWHID does not allow metadata pseudo-SWHIDs
None, # ExtendedSWHID does not allow qualifiers
),
]
@pytest.mark.parametrize(
"string,core,qualified,extended",
[
pytest.param(string, core, qualified, extended, id=string)
for (string, core, qualified, extended) in VALID_SWHIDS
],
)
def test_parse_unparse_swhids(string, core, qualified, extended):
"""Tests parsing and serializing valid SWHIDs with the various SWHID classes."""
classes = [CoreSWHID, QualifiedSWHID, ExtendedSWHID]
for cls, parsed_swhid in zip(classes, [core, qualified, extended]):
if parsed_swhid is None:
# This class should not accept this SWHID
with pytest.raises(ValidationError) as excinfo:
cls.from_string(string)
# Check string serialization for exception
assert str(excinfo.value) is not None
else:
# This class should
assert cls.from_string(string) == parsed_swhid
# Also check serialization
assert string == str(parsed_swhid)
@pytest.mark.parametrize(
"core,extended",
[
pytest.param(core, extended, id=string)
for (string, core, qualified, extended) in VALID_SWHIDS
if core is not None
],
)
def test_core_to_extended(core, extended):
assert core.to_extended() == extended
@pytest.mark.parametrize(
"core,qualified",
[
pytest.param(core, qualified, id=string)
for (string, core, qualified, extended) in VALID_SWHIDS
if core is not None
],
)
def test_core_to_qualified(core, qualified):
assert core.to_qualified() == qualified
@pytest.mark.parametrize(
"ns,version,type,id,qualifiers",
[
("foo", 1, ObjectType.CONTENT, "abc8bc9d7a6bcf6db04f476d29314f157507d505", {}),
("swh", 2, ObjectType.CONTENT, "def8bc9d7a6bcf6db04f476d29314f157507d505", {}),
("swh", 1, ObjectType.DIRECTORY, "aaaa", {}),
],
)
def test_QualifiedSWHID_validation_error(ns, version, type, id, qualifiers):
with pytest.raises(ValidationError):
QualifiedSWHID(
namespace=ns,
scheme_version=version,
object_type=type,
object_id=_x(id),
**qualifiers,
)
QSWHID_EXPECTED = [
# No qualifier:
(ObjectType.CONTENT, {}, f"swh:1:cnt:{HASH}"),
# origin:
(ObjectType.CONTENT, {"origin": None}, f"swh:1:cnt:{HASH}"),
(ObjectType.CONTENT, {"origin": 42}, ValueError),
# visit:
(
ObjectType.CONTENT,
{"visit": f"swh:1:snp:{HASH}"},
f"swh:1:cnt:{HASH};visit=swh:1:snp:{HASH}",
),
(
ObjectType.CONTENT,
{"visit": CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=_x(HASH))},
f"swh:1:cnt:{HASH};visit=swh:1:snp:{HASH}",
),
(ObjectType.CONTENT, {"visit": 42}, TypeError),
(
ObjectType.CONTENT,
{"visit": f"swh:1:rel:{HASH}"},
ValidationError,
),
(
ObjectType.CONTENT,
{"visit": CoreSWHID(object_type=ObjectType.RELEASE, object_id=_x(HASH))},
ValidationError,
),
# anchor:
(
ObjectType.CONTENT,
{"anchor": f"swh:1:snp:{HASH}"},
f"swh:1:cnt:{HASH};anchor=swh:1:snp:{HASH}",
),
(
ObjectType.CONTENT,
{"anchor": CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=_x(HASH))},
f"swh:1:cnt:{HASH};anchor=swh:1:snp:{HASH}",
),
(
ObjectType.CONTENT,
{"anchor": f"swh:1:dir:{HASH}"},
f"swh:1:cnt:{HASH};anchor=swh:1:dir:{HASH}",
),
(
ObjectType.CONTENT,
{"anchor": CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=_x(HASH))},
f"swh:1:cnt:{HASH};anchor=swh:1:dir:{HASH}",
),
(ObjectType.CONTENT, {"anchor": 42}, TypeError),
(
ObjectType.CONTENT,
{"anchor": f"swh:1:cnt:{HASH}"},
ValidationError,
),
(
ObjectType.CONTENT,
{"anchor": CoreSWHID(object_type=ObjectType.CONTENT, object_id=_x(HASH))},
ValidationError,
),
# path:
(
ObjectType.CONTENT,
{"path": b"/foo"},
f"swh:1:cnt:{HASH};path=/foo",
),
(
ObjectType.CONTENT,
{"path": b"/foo;bar"},
f"swh:1:cnt:{HASH};path=/foo%3Bbar",
),
(
ObjectType.CONTENT,
{"path": "/foo"},
f"swh:1:cnt:{HASH};path=/foo",
),
(
ObjectType.CONTENT,
{"path": "/foo;bar"},
f"swh:1:cnt:{HASH};path=/foo%3Bbar",
),
(ObjectType.CONTENT, {"path": 42}, Exception),
# lines:
(
ObjectType.CONTENT,
{"lines": (42, None)},
f"swh:1:cnt:{HASH};lines=42",
),
(
ObjectType.CONTENT,
{"lines": (21, 42)},
f"swh:1:cnt:{HASH};lines=21-42",
),
(
ObjectType.CONTENT,
{"lines": 42},
TypeError,
),
(
ObjectType.CONTENT,
{"lines": (None, 42)},
ValueError,
),
(
ObjectType.CONTENT,
{"lines": ("42", None)},
ValueError,
),
]
@pytest.mark.parametrize("object_type,qualifiers,expected", QSWHID_EXPECTED)
def test_QualifiedSWHID_init(object_type, qualifiers, expected):
"""Tests validation and converters of qualifiers"""
if isinstance(expected, type):
assert issubclass(expected, Exception)
with pytest.raises(expected):
QualifiedSWHID(object_type=object_type, object_id=_x(HASH), **qualifiers)
else:
assert isinstance(expected, str)
swhid = QualifiedSWHID(
object_type=object_type, object_id=_x(HASH), **qualifiers
)
# Check the build object has the right serialization
assert expected == str(swhid)
# Check the internal state of the object is the same as if parsed from a string
assert QualifiedSWHID.from_string(expected) == swhid
@pytest.mark.parametrize(
"object_type,qualifiers",
[
(type_, dict_)
for (type_, dict_, str_or_exc) in QSWHID_EXPECTED
if isinstance(str_or_exc, str)
],
)
def test_QualifiedSWHID_to_dict(object_type, qualifiers):
qswhid = QualifiedSWHID(object_type=object_type, object_id=_x(HASH), **qualifiers)
d = qswhid.to_dict()
swhid = CoreSWHID.from_string(d.pop("swhid"))
other = QualifiedSWHID(
object_type=swhid.object_type, object_id=swhid.object_id, **d
)
assert qswhid == other
def test_QualifiedSWHID_hash():
object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2")
assert hash(
QualifiedSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id)
) == hash(QualifiedSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id))
assert hash(
QualifiedSWHID(
object_type=ObjectType.DIRECTORY,
object_id=object_id,
**dummy_qualifiers,
)
) == hash(
QualifiedSWHID(
object_type=ObjectType.DIRECTORY,
object_id=object_id,
**dummy_qualifiers,
)
)
# Different order of the dictionary, so the underlying order of the tuple in
# ImmutableDict is different.
assert hash(
QualifiedSWHID(
object_type=ObjectType.DIRECTORY,
object_id=object_id,
origin="https://example.com",
lines=(42, None),
)
) == hash(
QualifiedSWHID(
object_type=ObjectType.DIRECTORY,
object_id=object_id,
lines=(42, None),
origin="https://example.com",
)
)
def test_QualifiedSWHID_eq():
object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2")
assert QualifiedSWHID(
object_type=ObjectType.DIRECTORY, object_id=object_id
) == QualifiedSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id)
assert QualifiedSWHID(
object_type=ObjectType.DIRECTORY,
object_id=object_id,
**dummy_qualifiers,
) == QualifiedSWHID(
object_type=ObjectType.DIRECTORY,
object_id=object_id,
**dummy_qualifiers,
)
assert QualifiedSWHID(
object_type=ObjectType.DIRECTORY,
object_id=object_id,
**dummy_qualifiers,
) == QualifiedSWHID(
object_type=ObjectType.DIRECTORY,
object_id=object_id,
**dummy_qualifiers,
)
QUALIFIED_SWHIDS = [
# origin:
(
f"swh:1:cnt:{HASH};origin=https://github.com/python/cpython",
QualifiedSWHID(
object_type=ObjectType.CONTENT,
object_id=_x(HASH),
origin="https://github.com/python/cpython",
),
),
(
f"swh:1:cnt:{HASH};origin=https://example.org/foo%3Bbar%25baz",
QualifiedSWHID(
object_type=ObjectType.CONTENT,
object_id=_x(HASH),
origin="https://example.org/foo;bar%baz",
),
),
(
f"swh:1:cnt:{HASH};origin=https://example.org?project=test",
QualifiedSWHID(
object_type=ObjectType.CONTENT,
object_id=_x(HASH),
origin="https://example.org?project=test",
),
),
# visit:
(
f"swh:1:cnt:{HASH};visit=swh:1:snp:{HASH}",
QualifiedSWHID(
object_type=ObjectType.CONTENT,
object_id=_x(HASH),
visit=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=_x(HASH)),
),
),
(
f"swh:1:cnt:{HASH};visit=swh:1:rel:{HASH}",
None,
),
# anchor:
(
f"swh:1:cnt:{HASH};anchor=swh:1:dir:{HASH}",
QualifiedSWHID(
object_type=ObjectType.CONTENT,
object_id=_x(HASH),
anchor=CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=_x(HASH)),
),
),
(
f"swh:1:cnt:{HASH};anchor=swh:1:rev:{HASH}",
QualifiedSWHID(
object_type=ObjectType.CONTENT,
object_id=_x(HASH),
anchor=CoreSWHID(object_type=ObjectType.REVISION, object_id=_x(HASH)),
),
),
(
f"swh:1:cnt:{HASH};anchor=swh:1:cnt:{HASH}",
None, # 'cnt' is not valid in anchor
),
(
f"swh:1:cnt:{HASH};anchor=swh:1:ori:{HASH}",
None, # 'ori' is not valid in a CoreSWHID
),
# path:
(
f"swh:1:cnt:{HASH};path=/foo",
QualifiedSWHID(
object_type=ObjectType.CONTENT, object_id=_x(HASH), path=b"/foo"
),
),
(
f"swh:1:cnt:{HASH};path=/foo%3Bbar",
QualifiedSWHID(
object_type=ObjectType.CONTENT, object_id=_x(HASH), path=b"/foo;bar"
),
),
(
f"swh:1:cnt:{HASH};path=/foo%25bar",
QualifiedSWHID(
object_type=ObjectType.CONTENT, object_id=_x(HASH), path=b"/foo%bar"
),
),
(
f"swh:1:cnt:{HASH};path=/foo/bar%3Dbaz",
QualifiedSWHID(
object_type=ObjectType.CONTENT, object_id=_x(HASH), path=b"/foo/bar=baz"
),
),
# lines
(
f"swh:1:cnt:{HASH};lines=1-18",
QualifiedSWHID(
object_type=ObjectType.CONTENT,
object_id=_x(HASH),
lines=(1, 18),
),
),
(
f"swh:1:cnt:{HASH};lines=18",
QualifiedSWHID(
object_type=ObjectType.CONTENT,
object_id=_x(HASH),
lines=(18, None),
),
),
(
f"swh:1:cnt:{HASH};lines=",
None,
),
(
f"swh:1:cnt:{HASH};lines=aa",
None,
),
(
f"swh:1:cnt:{HASH};lines=18-aa",
None,
),
]
@pytest.mark.parametrize("string,parsed", QUALIFIED_SWHIDS)
def test_QualifiedSWHID_parse_serialize_qualifiers(string, parsed):
"""Tests parsing and serializing valid SWHIDs with the various SWHID classes."""
if parsed is None:
with pytest.raises(ValidationError):
print(repr(QualifiedSWHID.from_string(string)))
else:
assert QualifiedSWHID.from_string(string) == parsed
assert str(parsed) == string
def test_QualifiedSWHID_deserialize_origin_extra_escapes():
"""Checks that semicolon in origins are escaped."""
string = f"swh:1:cnt:{HASH};origin=https://example.org/foo%3Bbar%25baz"
swhid = QualifiedSWHID(
object_type=ObjectType.CONTENT,
object_id=_x(HASH),
origin="https://example.org/foo;bar%baz",
)
assert QualifiedSWHID.from_string(string) == swhid
def test_QualifiedSWHID_attributes():
"""Checks the set of QualifiedSWHID attributes match the SWHID_QUALIFIERS
constant."""
assert set(attr.fields_dict(QualifiedSWHID)) == {
"namespace",
"scheme_version",
"object_type",
"object_id",
*SWHID_QUALIFIERS,
}
@pytest.mark.parametrize(
"ns,version,type,id",
[
("foo", 1, ObjectType.CONTENT, "abc8bc9d7a6bcf6db04f476d29314f157507d505"),
("swh", 2, ObjectType.CONTENT, "def8bc9d7a6bcf6db04f476d29314f157507d505"),
("swh", 1, ObjectType.DIRECTORY, "aaaa"),
],
)
def test_CoreSWHID_validation_error(ns, version, type, id):
with pytest.raises(ValidationError):
CoreSWHID(
namespace=ns,
scheme_version=version,
object_type=type,
object_id=_x(id),
)
def test_CoreSWHID_hash():
object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2")
assert hash(
CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id)
) == hash(CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id))
assert hash(
CoreSWHID(
object_type=ObjectType.DIRECTORY,
object_id=object_id,
)
) == hash(
CoreSWHID(
object_type=ObjectType.DIRECTORY,
object_id=object_id,
)
)
# Different order of the dictionary, so the underlying order of the tuple in
# ImmutableDict is different.
assert hash(
CoreSWHID(
object_type=ObjectType.DIRECTORY,
object_id=object_id,
)
) == hash(
CoreSWHID(
object_type=ObjectType.DIRECTORY,
object_id=object_id,
)
)
def test_CoreSWHID_eq():
object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2")
assert CoreSWHID(
object_type=ObjectType.DIRECTORY, object_id=object_id
) == CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id)
assert CoreSWHID(
object_type=ObjectType.DIRECTORY,
object_id=object_id,
) == CoreSWHID(
object_type=ObjectType.DIRECTORY,
object_id=object_id,
)
assert CoreSWHID(
object_type=ObjectType.DIRECTORY,
object_id=object_id,
) == CoreSWHID(
object_type=ObjectType.DIRECTORY,
object_id=object_id,
)
@pytest.mark.parametrize(
"ns,version,type,id",
[
(
"foo",
1,
ExtendedObjectType.CONTENT,
"abc8bc9d7a6bcf6db04f476d29314f157507d505",
),
(
"swh",
2,
ExtendedObjectType.CONTENT,
"def8bc9d7a6bcf6db04f476d29314f157507d505",
),
("swh", 1, ExtendedObjectType.DIRECTORY, "aaaa"),
],
)
def test_ExtendedSWHID_validation_error(ns, version, type, id):
with pytest.raises(ValidationError):
ExtendedSWHID(
namespace=ns,
scheme_version=version,
object_type=type,
object_id=_x(id),
)
def test_ExtendedSWHID_hash():
object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2")
assert hash(
ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id)
) == hash(
ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id)
)
assert hash(
ExtendedSWHID(
object_type=ExtendedObjectType.DIRECTORY,
object_id=object_id,
)
) == hash(
ExtendedSWHID(
object_type=ExtendedObjectType.DIRECTORY,
object_id=object_id,
)
)
# Different order of the dictionary, so the underlying order of the tuple in
# ImmutableDict is different.
assert hash(
ExtendedSWHID(
object_type=ExtendedObjectType.DIRECTORY,
object_id=object_id,
)
) == hash(
ExtendedSWHID(
object_type=ExtendedObjectType.DIRECTORY,
object_id=object_id,
)
)
def test_ExtendedSWHID_eq():
object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2")
assert ExtendedSWHID(
object_type=ExtendedObjectType.DIRECTORY, object_id=object_id
) == ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id)
assert ExtendedSWHID(
object_type=ExtendedObjectType.DIRECTORY,
object_id=object_id,
) == ExtendedSWHID(
object_type=ExtendedObjectType.DIRECTORY,
object_id=object_id,
)
assert ExtendedSWHID(
object_type=ExtendedObjectType.DIRECTORY,
object_id=object_id,
) == ExtendedSWHID(
object_type=ExtendedObjectType.DIRECTORY,
object_id=object_id,
)
def test_object_types():
"""Checks ExtendedObjectType is a superset of ObjectType"""
for member in ObjectType:
assert getattr(ExtendedObjectType, member.name).value == member.value
......@@ -25,16 +25,16 @@ def is_toposorted_slow(revision_log):
Returns:
True if the revision log is topologically sorted.
"""
rev_by_id = {r['id']: r for r in revision_log}
rev_by_id = {r["id"]: r for r in revision_log}
def all_parents(revision):
for parent in revision['parents']:
for parent in revision["parents"]:
yield parent
yield from all_parents(rev_by_id[parent])
visited = set()
for rev in revision_log:
visited.add(rev['id'])
visited.add(rev["id"])
if not all(parent in visited for parent in all_parents(rev)):
return False
return True
......@@ -43,10 +43,10 @@ def is_toposorted_slow(revision_log):
class TestToposort(unittest.TestCase):
def generate_log(self, graph):
for node_id, parents in graph.items():
yield {'id': node_id, 'parents': tuple(parents)}
yield {"id": node_id, "parents": tuple(parents)}
def unordered_log(self, log):
return {(d['id'], tuple(d['parents'])) for d in log}
return {(d["id"], tuple(d["parents"])) for d in log}
def check(self, graph):
log = list(self.generate_log(graph))
......@@ -56,45 +56,28 @@ class TestToposort(unittest.TestCase):
self.assertTrue(is_toposorted_slow(toposort(log)))
def test_linked_list(self):
self.check({3: [2],
2: [1],
1: []})
self.check({3: [2], 2: [1], 1: []})
def test_fork(self):
self.check({7: [6],
6: [4],
5: [3],
4: [2],
3: [2],
2: [1],
1: []})
self.check({7: [6], 6: [4], 5: [3], 4: [2], 3: [2], 2: [1], 1: []})
def test_fork_merge(self):
self.check({8: [7, 5],
7: [6],
6: [4],
5: [3],
4: [2],
3: [2],
2: [1],
1: []})
self.check({8: [7, 5], 7: [6], 6: [4], 5: [3], 4: [2], 3: [2], 2: [1], 1: []})
def test_two_origins(self):
self.check({9: [8],
8: [7, 5],
7: [6],
6: [4],
5: [3],
4: [],
3: []})
self.check({9: [8], 8: [7, 5], 7: [6], 6: [4], 5: [3], 4: [], 3: []})
def test_three_way(self):
self.check({9: [8, 4, 2],
8: [7, 5],
7: [6],
6: [4],
5: [3],
4: [2],
3: [2],
2: [1],
1: []})
self.check(
{
9: [8, 4, 2],
8: [7, 5],
7: [6],
6: [4],
5: [3],
4: [2],
3: [2],
2: [1],
1: [],
}
)