Skip to content
Snippets Groups Projects
Commit b064a0bb authored by David Douard's avatar David Douard
Browse files

Add a test data generator module

currently provides mainly 2 generators:
- gen_origins()
- gen_contents()
parent 75645964
No related branches found
Tags v0.0.50
No related merge requests found
Click
dulwich
pytest
pytz
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime
from pytz import all_timezones, timezone
from random import choice, randint, random, shuffle
from typing import List, Dict
from swh.model.hashutil import MultiHash
PROTOCOLS = ['git', 'http', 'https', 'deb', 'svn', 'mock']
DOMAINS = ['example.com', 'some.long.host.name', 'xn--n28h.tld']
PATHS = ['', '/', '/stuff', '/stuff/',
'/path/to/resource',
'/path/with/anchor#id=42',
'/path/with/qargs?q=1&b']
CONTENT_STATUS = ['visible', 'hidden', 'absent']
MAX_DATE = 3e9 # around 2065
def gen_all_origins():
for protocol in PROTOCOLS:
for domain in DOMAINS:
for urlpath in PATHS:
yield {'url': '%s://%s%s' % (protocol, domain, urlpath)}
ORIGINS = list(gen_all_origins())
def gen_origins(n: int = 100) -> List:
"""Returns a list of n randomly generated origins suitable for using as
Storage.add_origin() argument.
"""
origins = ORIGINS[:]
shuffle(origins)
return origins[:n]
def gen_content():
size = randint(1, 10 * 1024)
data = bytes(randint(0, 255) for i in range(size))
status = choice(CONTENT_STATUS)
h = MultiHash.from_data(data)
ctime = datetime.fromtimestamp(
random() * MAX_DATE, timezone(choice(all_timezones)))
content = {'data': data,
'status': status,
'length': size,
'ctime': ctime,
**h.digest()}
if status == 'absent':
content['reason'] = 'why not'
content['data'] = b''
return content
def gen_contents(n=20) -> List[Dict]:
"""Returns a list of n randomly generated content objects (as dict) suitable
for using as Storage.content_add() argument.
"""
return [gen_content() for i in range(n)]
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from .generate_testdata import gen_contents, gen_origins, ORIGINS
from swh.model.model import Origin, Content
def test_gen_origins_empty():
origins = gen_origins(0)
assert not origins
def test_gen_origins_one():
origins = gen_origins(1)
assert len(origins) == 1
assert [Origin.from_dict(d) for d in origins]
def test_gen_origins_default():
origins = gen_origins()
assert len(origins) == 100
models = [Origin.from_dict(d).url for d in origins]
assert len(origins) == len(set(models))
def test_gen_origins_max():
nmax = len(ORIGINS)
origins = gen_origins(nmax+1)
assert len(origins) == nmax
models = {Origin.from_dict(d).url for d in origins}
# ensure we did not generate the same origin twice
assert len(origins) == len(models)
def test_gen_contents_empty():
contents = gen_contents(0)
assert not contents
def test_gen_contents_one():
contents = gen_contents(1)
assert len(contents) == 1
assert [Content.from_dict(d) for d in contents]
def test_gen_contents_default():
contents = gen_contents()
assert len(contents) == 20
models = {Content.from_dict(d) for d in contents}
# ensure we did not generate the same content twice
assert len(contents) == len(models)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment