Skip to content
Snippets Groups Projects
Commit 6299c091 authored by Franck Bret's avatar Franck Bret
Browse files

Puppet: The puppet loader loads origins from https://forge.puppet.com

For each origin it takes advantage of 'artifacts' data send through
'extra_loader_arguments' from the Puppet lister, providing versions,
archive url, last_update, filename.
Author and description are extracted from intrinsic metadata.

Related T4580
parent 2db1a754
No related branches found
No related tags found
1 merge request!325Puppet: The puppet loader loads origins from https://forge.puppet.com
......@@ -149,6 +149,15 @@ Here is an overview of the fields (+ internal version name + branch name) used b
- from extrinsic metadata
- from extrinsic metadata
- name, version and description from intrinsic metadata
* - puppet
- ``p_info.​version``
- ``release_name(​version)``
- =version
- Synthetic release for Puppet source package {p_info.name} version {version} {description}
- true
- from intrinsic metadata
- from extrinsic metadata
- version and description from intrinsic metadata
* - pypi
- ``metadata​["version"]``
- ``release_name(​version)`` or ``release_name(​version, filename)``
......
......@@ -68,6 +68,7 @@ setup(
loader.npm=swh.loader.package.npm:register
loader.opam=swh.loader.package.opam:register
loader.pubdev=swh.loader.package.pubdev:register
loader.puppet=swh.loader.package.puppet:register
loader.pypi=swh.loader.package.pypi:register
loader.maven=swh.loader.package.maven:register
""",
......
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Any, Mapping
def register() -> Mapping[str, Any]:
"""Register the current worker module's definition"""
from .loader import PuppetLoader
return {
"task_modules": [f"{__name__}.tasks"],
"loader": PuppetLoader,
}
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime
import json
from pathlib import Path
from typing import Any, Dict, Iterator, Optional, Sequence, Tuple
import attr
import iso8601
from packaging.version import parse as parse_version
from swh.loader.package.loader import BasePackageInfo, PackageLoader
from swh.loader.package.utils import Person, release_name
from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone
from swh.storage.interface import StorageInterface
@attr.s
class PuppetPackageInfo(BasePackageInfo):
name = attr.ib(type=str)
"""Name of the package"""
filename = attr.ib(type=str)
"""Archive (tar.gz) file name"""
version = attr.ib(type=str)
"""Current version"""
last_modified = attr.ib(type=datetime)
"""Module last update date as release date"""
def extract_intrinsic_metadata(dir_path: Path) -> Dict[str, Any]:
"""Extract intrinsic metadata from metadata.json file at dir_path.
Each Puppet module version has a metadata.json file at the root of the archive.
See ``https://puppet.com/docs/puppet/7/modules_metadata.html`` for metadata specifications.
Args:
dir_path: A directory on disk where a metadata.json file must be present
Returns:
A dict mapping from json parser
"""
meta_json_path = dir_path / "metadata.json"
metadata: Dict[str, Any] = json.loads(meta_json_path.read_text())
return metadata
class PuppetLoader(PackageLoader[PuppetPackageInfo]):
visit_type = "puppet"
def __init__(
self,
storage: StorageInterface,
url: str,
artifacts: Dict[str, Any],
**kwargs,
):
super().__init__(storage=storage, url=url, **kwargs)
self.url = url
self.artifacts = artifacts
def get_versions(self) -> Sequence[str]:
"""Get all released versions of a Puppet module
Returns:
A sequence of versions
Example::
["0.1.1", "0.10.2"]
"""
versions = list(self.artifacts.keys())
versions.sort(key=parse_version)
return versions
def get_default_version(self) -> str:
"""Get the newest release version of a Puppet module
Returns:
A string representing a version
Example::
"0.10.2"
"""
return self.get_versions()[-1]
def get_package_info(self, version: str) -> Iterator[Tuple[str, PuppetPackageInfo]]:
"""Get release name and package information from version
Args:
version: Package version (e.g: "0.1.0")
Returns:
Iterator of tuple (release_name, p_info)
"""
data = self.artifacts[version]
assert data["filename"].endswith(f"-{version}.tar.gz")
pkgname: str = data["filename"].split(f"-{version}.tar.gz")[0]
url: str = data["url"]
filename: str = data["filename"]
last_modified: datetime = iso8601.parse_date(data["last_update"])
p_info = PuppetPackageInfo(
name=pkgname,
filename=filename,
url=url,
version=version,
last_modified=last_modified,
)
yield release_name(version), p_info
def build_release(
self, p_info: PuppetPackageInfo, uncompressed_path: str, directory: Sha1Git
) -> Optional[Release]:
# compute extracted module directory name
dirname = p_info.filename.split(".tar.gz")[0]
# Extract intrinsic metadata from uncompressed_path/{dirname}/metadata.json
intrinsic_metadata = extract_intrinsic_metadata(
Path(uncompressed_path) / f"{dirname}"
)
version: str = intrinsic_metadata["version"]
assert version == p_info.version
description = intrinsic_metadata["summary"]
author = Person.from_fullname(intrinsic_metadata["author"].encode())
message = (
f"Synthetic release for Puppet source package {p_info.name} "
f"version {version}\n\n"
f"{description}\n"
)
return Release(
name=version.encode(),
author=author,
date=TimestampWithTimezone.from_datetime(p_info.last_modified),
message=message.encode(),
target_type=ObjectType.DIRECTORY,
target=directory,
synthetic=True,
)
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from celery import shared_task
from swh.loader.package.puppet.loader import PuppetLoader
@shared_task(name=__name__ + ".LoadPuppet")
def load_puppet(**kwargs):
"""Load Puppet modules from puppet.com"""
return PuppetLoader.from_configfile(**kwargs).load()
#!/usr/bin/env bash
# Script to generate fake Puppet module archives as .tar.gz.
set -euo pipefail
# Create directories
readonly TMP=tmp_dir/puppet
readonly BASE_PATH=https_forgeapi.puppet.com
mkdir -p $TMP
# tar.gz package archives
# Puppet module tar.gz archive needs at least one directory with a metadata.json file
mkdir -p ${TMP}/saz-memcached-1.0.0
mkdir -p ${TMP}/saz-memcached-8.1.0
mkdir -p $BASE_PATH
echo -e '''{
"summary": "UNKNOWN",
"author": "saz",
"source": "UNKNOWN",
"dependencies": [
],
"types": [
],
"license": "Apache License, Version 2.0",
"project_page": "https://github.com/saz/puppet-memcached",
"version": "1.0.0",
"name": "saz-memcached",
"checksums": {
"spec/spec_helper.rb": "ca19ec4f451ebc7fdb035b52eae6e909",
"manifests/params.pp": "0b8904086e7fa6f0d1f667d547a17d96",
"README.md": "fa0b9f6d97f2763e565d8a330fb3930b",
"manifests/config.pp": "706f7c5001fb6014575909a335a52def",
"templates/memcached.conf.erb": "8151e00d922bb9ebb1a24a05ac0969d7",
"manifests/service.pp": "a528751401189c299a38cab12d52431f",
"tests/init.pp": "e798f4999ba392f3c0fce0d5290c263f",
"manifests/install.pp": "11a9e9a99a7bc1c7b2511ce7e79c9fb4",
"spec/spec.opts": "a600ded995d948e393fbe2320ba8e51c",
"metadata.json": "d34d0b70aba36510fbc2df4e667479ef",
"manifests/init.pp": "c5166a8a88b544ded705efac21494bc1",
"Modulefile": "7f512991a7d2ad99ffb28ac6e7419f9e"
},
"description": "Manage memcached via Puppet"
}
''' > ${TMP}/saz-memcached-1.0.0/metadata.json
echo -e '''{
"name": "saz-memcached",
"version": "8.1.0",
"author": "saz",
"summary": "Manage memcached via Puppet",
"license": "Apache-2.0",
"source": "git://github.com/saz/puppet-memcached.git",
"project_page": "https://github.com/saz/puppet-memcached",
"issues_url": "https://github.com/saz/puppet-memcached/issues",
"description": "Manage memcached via Puppet",
"requirements": [
{"name":"puppet","version_requirement":">= 6.1.0 < 8.0.0" }
],
"dependencies": [
{"name":"puppetlabs/stdlib","version_requirement":">= 4.13.1 < 9.0.0"},
{"name":"puppetlabs/firewall","version_requirement":">= 0.1.0 < 4.0.0"},
{"name":"puppet/systemd","version_requirement":">= 2.10.0 < 4.0.0"},
{"name":"puppet/selinux","version_requirement":">= 3.2.0 < 4.0.0"}
],
"operatingsystem_support": [
{
"operatingsystem": "RedHat",
"operatingsystemrelease": [
"7",
"8",
"9"
]
},
{
"operatingsystem": "CentOS",
"operatingsystemrelease": [
"7",
"8",
"9"
]
},
{
"operatingsystem": "OracleLinux",
"operatingsystemrelease": [
"7"
]
},
{
"operatingsystem": "Scientific",
"operatingsystemrelease": [
"7"
]
},
{
"operatingsystem": "Debian",
"operatingsystemrelease": [
"9",
"10",
"11"
]
},
{
"operatingsystem": "Ubuntu",
"operatingsystemrelease": [
"18.04",
"20.04",
"22.04"
]
},
{
"operatingsystem": "Windows"
},
{
"operatingsystem": "FreeBSD"
}
]
}
''' > ${TMP}/saz-memcached-8.1.0/metadata.json
cd $TMP
# Tar compress
tar -czf v3_files_saz-memcached-1.0.0.tar.gz saz-memcached-1.0.0
tar -czf v3_files_saz-memcached-8.1.0.tar.gz saz-memcached-8.1.0
# Move .tar.gz archives to a servable directory
mv *.tar.gz ../../$BASE_PATH
# Clean up removing tmp_dir
cd ../../
rm -r tmp_dir/
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.loader.package.puppet.loader import PuppetLoader
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
from swh.model.hashutil import hash_to_bytes
from swh.model.model import (
ObjectType,
Person,
Release,
Snapshot,
SnapshotBranch,
TargetType,
TimestampWithTimezone,
)
ORIGINS = {
"url": "https://forge.puppet.com/modules/saz/memcached",
"artifacts": {
"1.0.0": {
"url": "https://forgeapi.puppet.com/v3/files/saz-memcached-1.0.0.tar.gz", # noqa: B950
"version": "1.0.0",
"filename": "saz-memcached-1.0.0.tar.gz",
"last_update": "2011-11-20T13:40:30-08:00",
},
"8.1.0": {
"url": "https://forgeapi.puppet.com/v3/files/saz-memcached-8.1.0.tar.gz", # noqa: B950
"version": "8.1.0",
"filename": "saz-memcached-8.1.0.tar.gz",
"last_update": "2022-07-11T03:34:55-07:00",
},
},
}
def test_get_versions(requests_mock_datadir, swh_storage):
loader = PuppetLoader(
swh_storage, url=ORIGINS["url"], artifacts=ORIGINS["artifacts"]
)
assert loader.get_versions() == ["1.0.0", "8.1.0"]
def test_get_default_version(requests_mock_datadir, swh_storage):
loader = PuppetLoader(
swh_storage, url=ORIGINS["url"], artifacts=ORIGINS["artifacts"]
)
assert loader.get_default_version() == "8.1.0"
def test_puppet_loader_load_multiple_version(
datadir, requests_mock_datadir, swh_storage
):
loader = PuppetLoader(
swh_storage, url=ORIGINS["url"], artifacts=ORIGINS["artifacts"]
)
load_status = loader.load()
assert load_status["status"] == "eventful"
assert load_status["snapshot_id"] is not None
expected_snapshot_id = "9a8e76a8a6eae5285059d9f6d5083a99317727cf"
assert expected_snapshot_id == load_status["snapshot_id"]
expected_snapshot = Snapshot(
id=hash_to_bytes(load_status["snapshot_id"]),
branches={
b"HEAD": SnapshotBranch(
target=b"releases/8.1.0",
target_type=TargetType.ALIAS,
),
b"releases/1.0.0": SnapshotBranch(
target=hash_to_bytes("50eb560bb5322cd149359b9cc8debc78834bcfad"),
target_type=TargetType.RELEASE,
),
b"releases/8.1.0": SnapshotBranch(
target=hash_to_bytes("2f5722136d775dd48fe85fabdd274f1e2d7fcf22"),
target_type=TargetType.RELEASE,
),
},
)
check_snapshot(expected_snapshot, swh_storage)
stats = get_stats(swh_storage)
assert {
"content": 1 + 1,
"directory": 2 + 2,
"origin": 1,
"origin_visit": 1,
"release": 1 + 1,
"revision": 0,
"skipped_content": 0,
"snapshot": 1,
} == stats
assert swh_storage.release_get(
[hash_to_bytes("2f5722136d775dd48fe85fabdd274f1e2d7fcf22")]
)[0] == Release(
name=b"8.1.0",
message=b"Synthetic release for Puppet source package saz-memcached version 8.1.0\n\n"
b"Manage memcached via Puppet\n",
target=hash_to_bytes("1b9a2dbc80f954e1ba4b2f1c6344d1ce4e84ab7c"),
target_type=ObjectType.DIRECTORY,
synthetic=True,
author=Person(fullname=b"saz", name=b"saz", email=None),
date=TimestampWithTimezone.from_iso8601("2022-07-11T03:34:55-07:00"),
id=hash_to_bytes("2f5722136d775dd48fe85fabdd274f1e2d7fcf22"),
)
assert_last_visit_matches(
swh_storage,
url=ORIGINS["url"],
status="full",
type="puppet",
snapshot=expected_snapshot.id,
)
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
def test_tasks_puppet_loader(
mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
):
mock_load = mocker.patch("swh.loader.package.puppet.loader.PuppetLoader.load")
mock_load.return_value = {"status": "eventful"}
res = swh_scheduler_celery_app.send_task(
"swh.loader.package.puppet.tasks.LoadPuppet",
kwargs=dict(
url="some-url/api/packages/some-package",
artifacts={
"1.0.0": {
"url": "https://domain/some-package-1.0.0.tar.gz",
"version": "1.0.0",
"filename": "some-module-1.0.0.tar.gz",
"last_update": "2011-11-20T13:40:30-08:00",
},
},
),
)
assert res
res.wait()
assert res.successful()
assert mock_load.called
assert res.result == {"status": "eventful"}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment