Skip to content
Snippets Groups Projects
Commit 73826fc3 authored by Jenkins for Software Heritage's avatar Jenkins for Software Heritage
Browse files

New upstream version 0.0.46

parents ab4aedb5 5b652b30
No related branches found
Tags debian/upstream/0.0.46
No related merge requests found
Metadata-Version: 2.1
Name: swh.lister
Version: 0.0.45
Version: 0.0.46
Summary: Software Heritage lister
Home-page: https://forge.softwareheritage.org/diffusion/DLSGH/
Author: Software Heritage developers
......
Metadata-Version: 2.1
Name: swh.lister
Version: 0.0.45
Version: 0.0.46
Summary: Software Heritage lister
Home-page: https://forge.softwareheritage.org/diffusion/DLSGH/
Author: Software Heritage developers
......
......@@ -91,6 +91,7 @@ swh/lister/debian/tasks.py
swh/lister/debian/utils.py
swh/lister/debian/tests/__init__.py
swh/lister/debian/tests/conftest.py
swh/lister/debian/tests/test_init.py
swh/lister/debian/tests/test_lister.py
swh/lister/debian/tests/test_models.py
swh/lister/debian/tests/test_tasks.py
......
......@@ -16,6 +16,7 @@ from sqlalchemy.orm import sessionmaker
from typing import Any, Dict, Type, Union
from swh.core import config
from swh.core.utils import grouper
from swh.scheduler import get_scheduler, utils
from .abstractattribute import AbstractAttribute
......@@ -468,11 +469,12 @@ class ListerBase(abc.ABC, config.SWHConfig):
task_dict = self.task_dict(**m)
tasks[_task_key(task_dict)] = (ir, m, task_dict)
new_tasks = self.scheduler.create_tasks(
(task_dicts for (_, _, task_dicts) in tasks.values()))
for task in new_tasks:
ir, m, _ = tasks[_task_key(task)]
ir.task_id = task['id']
gen_tasks = (task_dicts for (_, _, task_dicts) in tasks.values())
for grouped_tasks in grouper(gen_tasks, n=1000):
new_tasks = self.scheduler.create_tasks(list(grouped_tasks))
for task in new_tasks:
ir, m, _ = tasks[_task_key(task)]
ir.task_id = task['id']
def ingest_data(self, identifier, checks=False):
"""The core data fetch sequence. Request server endpoint. Simplify and
......
......@@ -3,46 +3,62 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
from typing import Any, List, Mapping
logger = logging.getLogger(__name__)
def debian_init(db_engine,
override_conf: Mapping[str, Any] = {},
distributions: List[str] = ['stretch', 'buster'],
area_names: List[str] = ['main', 'contrib', 'non-free']):
distribution_name: str = 'Debian',
suites: List[str] = ['stretch', 'buster', 'bullseye'],
components: List[str] = ['main', 'contrib', 'non-free']):
"""Initialize the debian data model.
Args:
db_engine: SQLAlchemy manipulation database object
override_conf: Override conf to pass to instantiate a lister
distributions: Default distribution to build
distribution_name: Distribution to initialize
suites: Default suites to register with the lister
components: Default components to register per suite
"""
distribution_name = 'Debian'
from swh.lister.debian.models import Distribution, Area
from sqlalchemy.orm import sessionmaker
db_session = sessionmaker(bind=db_engine)()
existing_distrib = db_session \
.query(Distribution) \
distrib = db_session.query(Distribution) \
.filter(Distribution.name == distribution_name) \
.one_or_none()
if not existing_distrib:
distrib = Distribution(name=distribution_name,
type='deb',
mirror_uri='http://deb.debian.org/debian/')
if distrib is None:
distrib = Distribution(
name=distribution_name, type='deb',
mirror_uri='http://deb.debian.org/debian/'
)
db_session.add(distrib)
for distribution_name in distributions:
for area_name in area_names:
area = Area(
name='%s/%s' % (distribution_name, area_name),
distribution=distrib,
)
db_session.add(area)
# Check the existing
existing_area = db_session.query(Area) \
.filter(Area.distribution == distrib) \
.all()
existing_area = set([a.name for a in existing_area])
logger.debug('Area already known: %s', ', '.join(existing_area))
# Create only the new ones
for suite in suites:
for component in components:
area_name = f'{suite}/{component}'
if area_name in existing_area:
logger.debug("Area '%s' already set, skipping", area_name)
continue
area = Area(name=area_name, distribution=distrib)
db_session.add(area)
db_session.commit()
db_session.commit()
db_session.close()
......
......@@ -19,9 +19,9 @@ def lister_debian(swh_listers):
lister = swh_listers['debian']
# Initialize the debian data model
debian_init(lister.db_engine,
distributions=['stretch'],
area_names=['main', 'contrib'])
debian_init(
lister.db_engine, suites=['stretch'], components=['main', 'contrib']
)
# Add the load-deb-package in the scheduler backend
lister.scheduler.create_task_type({
......
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import pytest
from swh.lister.debian import debian_init
from swh.lister.debian.models import Distribution, Area
@pytest.fixture
def engine(session):
session.autoflush = False
return session.bind
def test_debian_init_step(engine, session):
distribution_name = 'KaliLinux'
distrib = session.query(Distribution) \
.filter(Distribution.name == distribution_name) \
.one_or_none()
assert distrib is None
all_area = session.query(Area).all()
assert all_area == []
suites = ['wheezy', 'jessie']
components = ['main', 'contrib']
debian_init(engine, distribution_name=distribution_name,
suites=suites, components=components)
distrib = session.query(Distribution) \
.filter(Distribution.name == distribution_name) \
.one_or_none()
assert distrib is not None
assert distrib.name == distribution_name
assert distrib.type == 'deb'
assert distrib.mirror_uri == 'http://deb.debian.org/debian/'
all_area = session.query(Area).all()
assert len(all_area) == 2 * 2, "2 suites * 2 components per suite"
expected_area_names = []
for suite in suites:
for component in components:
expected_area_names.append(f'{suite}/{component}')
for area in all_area:
area.id = None
assert area.distribution == distrib
assert area.name in expected_area_names
# check idempotency (on exact same call)
debian_init(engine, distribution_name=distribution_name,
suites=suites, components=components)
distribs = session.query(Distribution) \
.filter(Distribution.name == distribution_name) \
.all()
assert len(distribs) == 1
distrib = distribs[0]
all_area = session.query(Area).all()
assert len(all_area) == 2 * 2, "2 suites * 2 components per suite"
# Add a new suite
debian_init(engine, distribution_name=distribution_name,
suites=['lenny'], components=components)
all_area = [a.name for a in session.query(Area).all()]
assert len(all_area) == (2 + 1) * 2, "3 suites * 2 components per suite"
v0.0.45-0-g5ab9d67
\ No newline at end of file
v0.0.46-0-g5b652b3
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment