Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • anlambert/swh-vault
  • lunar/swh-vault
  • swh/devel/swh-vault
  • douardda/swh-vault
  • olasd/swh-vault
  • marmoute/swh-vault
  • rboyer/swh-vault
7 results
Show changes
# Copyright (C) 2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import pytest
from swh.vault import get_vault
from swh.vault.api.client import RemoteVaultClient
from swh.vault.backend import VaultBackend
SERVER_IMPLEMENTATIONS = [
("remote", RemoteVaultClient, {"url": "localhost"}),
(
"local",
VaultBackend,
{
"db": "something",
"cache": {"cls": "memory", "args": {}},
"storage": {"cls": "remote", "url": "mock://storage-url"},
"scheduler": {"cls": "remote", "url": "mock://scheduler-url"},
},
),
]
@pytest.fixture
def mock_psycopg2(mocker):
mocker.patch("swh.vault.backend.psycopg2.pool")
mocker.patch("swh.vault.backend.psycopg2.extras")
def test_init_get_vault_failure():
with pytest.raises(ValueError, match="Unknown Vault class"):
get_vault("unknown-vault-storage")
@pytest.mark.parametrize("class_name,expected_class,kwargs", SERVER_IMPLEMENTATIONS)
def test_init_get_vault(class_name, expected_class, kwargs, mock_psycopg2):
concrete_vault = get_vault(class_name, **kwargs)
assert isinstance(concrete_vault, expected_class)
@pytest.mark.parametrize("class_name,expected_class,kwargs", SERVER_IMPLEMENTATIONS)
def test_init_get_vault_deprecation_warning(
class_name, expected_class, kwargs, mock_psycopg2
):
with pytest.warns(DeprecationWarning):
concrete_vault = get_vault(class_name, args=kwargs)
assert isinstance(concrete_vault, expected_class)
def test_init_get_vault_ok(swh_vault_config):
concrete_vault = get_vault("local", **swh_vault_config)
assert isinstance(concrete_vault, VaultBackend)
# Copyright (C) 2017-2020 The Software Heritage developers
# Copyright (C) 2017-2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -18,10 +18,8 @@ def swh_cooker_config():
return {
"vault": {
"cls": "remote",
"args": {
"url": "mock://vault-backend",
"storage": {"cls": "remote", "url": "mock://storage-url"},
},
"url": "mock://vault-backend",
"storage": {"cls": "remote", "url": "mock://storage-url"},
}
}
......@@ -54,12 +52,12 @@ def test_write_to_env(swh_cooker_config, tmp_path, monkeypatch):
[
({}, ValueError, "missing 'vault' configuration"),
(
{"vault": {"cls": "local"}},
{"vault": {"cls": "postgresql"}},
EnvironmentError,
"This vault backend can only be a 'remote' configuration",
),
(
{"vault": {"cls": "remote", "args": {"missing-storage-key": ""}}},
{"vault": {"cls": "remote", "missing-storage-key": ""}},
ValueError,
"invalid configuration: missing 'storage' config entry",
),
......@@ -81,18 +79,14 @@ def test_get_cooker_config_ko(
{
"vault": {
"cls": "remote",
"args": {
"url": "mock://vault-backend",
"storage": {"cls": "remote", "url": "mock://storage-url"},
},
"url": "mock://vault-backend",
"storage": {"cls": "remote", "url": "mock://storage-url"},
}
},
{
"vault": {
"cls": "remote",
"args": {
"url": "mock://vault-backend",
},
"url": "mock://vault-backend",
},
"storage": {"cls": "remote", "url": "mock://storage-url"},
},
......@@ -102,11 +96,26 @@ def test_get_cooker_config_ko(
"url": "mock://vault-backend",
},
"storage": {"cls": "remote", "url": "mock://storage-url"},
"objstorage": {"cls": "memory"},
},
{
"vault": {
"cls": "remote",
"url": "mock://vault-backend",
},
"storage": {"cls": "remote", "url": "mock://storage-url"},
"graph": {"url": "mock://graph-url"},
},
],
)
def test_get_cooker_nominal(config_ok, tmp_path, monkeypatch):
def test_get_cooker_nominal(config_ok, tmp_path, monkeypatch, requests_mock):
"""Correct configuration should allow the instantiation of the cookers"""
requests_mock.get(
"mock://graph-url/stats",
json={"num_nodes": 42},
headers={"Content-Type": "application/json"},
)
for cooker_type in COOKER_TYPES.keys():
write_config_to_env(config_ok, tmp_path, monkeypatch)
......@@ -114,3 +123,11 @@ def test_get_cooker_nominal(config_ok, tmp_path, monkeypatch):
assert cooker is not None
assert isinstance(cooker, tuple(COOKER_TYPES[cooker_type]))
if config_ok.get("objstorage") or config_ok["vault"].get("objstorage"):
assert cooker.objstorage is not None
else:
assert cooker.objstorage is None
if config_ok.get("graph") or config_ok["vault"].get("graph"):
assert cooker.graph is not None
else:
assert cooker.graph is None
# Copyright (C) 2020-2022 The Software Heritage developers
# Copyright (C) 2020-2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -38,12 +38,14 @@ def swh_vault_server_config_file(swh_vault_server_config, monkeypatch, tmp_path)
return conf_path
def test_make_app_from_file_missing():
def test_make_app_from_file_missing(monkeypatch):
monkeypatch.delenv("SWH_CONFIG_FILENAME", raising=False)
with pytest.raises(ValueError, match="Missing configuration path."):
make_app_from_configfile()
def test_make_app_from_file_does_not_exist(tmp_path):
def test_make_app_from_file_does_not_exist(tmp_path, monkeypatch):
monkeypatch.delenv("SWH_CONFIG_FILENAME", raising=False)
conf_path = os.path.join(str(tmp_path), "vault-server.yml")
assert os.path.exists(conf_path) is False
......@@ -64,8 +66,9 @@ def test_make_app_from_env_variable(swh_vault_server_config_file):
swh.vault.api.server.vault = None
def test_make_app_from_file(swh_vault_server_config, tmp_path):
def test_make_app_from_file(swh_vault_server_config, tmp_path, monkeypatch):
"""Server initialization happens through path if provided"""
monkeypatch.delenv("SWH_CONFIG_FILENAME", raising=False)
conf_path = os.path.join(str(tmp_path), "vault-server.yml")
with open(conf_path, "w") as f:
f.write(yaml.dump(swh_vault_server_config))
......@@ -160,17 +163,15 @@ def test_check_config_missing_vault_configuration() -> None:
def test_check_config_not_local() -> None:
"""Wrong configuration raises"""
expected_error = (
"The vault backend can only be started with a 'postgresql' configuration"
"The vault backend of a vault server cannot be a 'remote' configuration"
)
with pytest.raises(EnvironmentError, match=expected_error):
check_config({"vault": {"cls": "remote"}})
@pytest.mark.parametrize("clazz", ["local", "postgresql"])
def test_check_config_ok(swh_vault_server_config, clazz) -> None:
def test_check_config_ok(swh_vault_server_config) -> None:
"""Check that the default config is accepted"""
config = swh_vault_server_config.copy()
config["vault"]["cls"] = clazz
assert swh_vault_server_config["vault"]["cls"] == "postgresql"
assert check_config(swh_vault_server_config) is not None
......
# Copyright (C) 2020-2022 The Software Heritage developers
# Copyright (C) 2020-2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -7,7 +7,7 @@ import pytest
from swh.model.from_disk import DentryPerms
from swh.model.model import Content, Directory, DirectoryEntry, SkippedContent
from swh.vault.to_disk import DirectoryBuilder, get_filtered_files_content
from swh.vault.to_disk import DirectoryBuilder, get_filtered_file_content
def test_get_filtered_files_content(swh_storage):
......@@ -37,7 +37,9 @@ def test_get_filtered_files_content(swh_storage):
},
]
res = list(get_filtered_files_content(swh_storage, files_data))
res = [
get_filtered_file_content(swh_storage, file_data) for file_data in files_data
]
assert res == [
{
......@@ -76,7 +78,7 @@ def test_get_filtered_files_content__unknown_status(swh_storage):
]
with pytest.raises(AssertionError, match="unexpected status 'blah'"):
list(get_filtered_files_content(swh_storage, files_data))
[get_filtered_file_content(swh_storage, file_data) for file_data in files_data]
def _fill_storage(swh_storage, exclude_cnt3=False, exclude_dir1=False):
......@@ -127,11 +129,19 @@ def _fill_storage(swh_storage, exclude_cnt3=False, exclude_dir1=False):
return dir2
def test_directory_builder(swh_storage, tmp_path):
@pytest.mark.parametrize(
"use_objstorage", [False, True], ids=["use only storage", "use objstorage"]
)
def test_directory_builder(swh_storage, tmp_path, use_objstorage):
dir2 = _fill_storage(swh_storage)
root = tmp_path / "root"
builder = DirectoryBuilder(swh_storage, bytes(root), dir2.id)
builder = DirectoryBuilder(
storage=swh_storage,
root=bytes(root),
dir_id=dir2.id,
objstorage=swh_storage.objstorage if use_objstorage else None,
)
assert not root.exists()
......@@ -150,11 +160,19 @@ def test_directory_builder(swh_storage, tmp_path):
assert (root / "content3").open().read() == "baz qux"
def test_directory_builder_missing_content(swh_storage, tmp_path):
@pytest.mark.parametrize(
"use_objstorage", [False, True], ids=["use only storage", "use objstorage"]
)
def test_directory_builder_missing_content(swh_storage, tmp_path, use_objstorage):
dir2 = _fill_storage(swh_storage, exclude_cnt3=True)
root = tmp_path / "root"
builder = DirectoryBuilder(swh_storage, bytes(root), dir2.id)
builder = DirectoryBuilder(
storage=swh_storage,
root=bytes(root),
dir_id=dir2.id,
objstorage=swh_storage.objstorage if use_objstorage else None,
)
assert not root.exists()
......@@ -165,11 +183,19 @@ def test_directory_builder_missing_content(swh_storage, tmp_path):
assert "This content is missing" in (root / "content3").open().read()
def test_directory_builder_missing_directory(swh_storage, tmp_path):
@pytest.mark.parametrize(
"use_objstorage", [False, True], ids=["use only storage", "use objstorage"]
)
def test_directory_builder_missing_directory(swh_storage, tmp_path, use_objstorage):
dir2 = _fill_storage(swh_storage, exclude_dir1=True)
root = tmp_path / "root"
builder = DirectoryBuilder(swh_storage, bytes(root), dir2.id)
builder = DirectoryBuilder(
storage=swh_storage,
root=bytes(root),
dir_id=dir2.id,
objstorage=swh_storage.objstorage if use_objstorage else None,
)
assert not root.exists()
......
# Copyright (C) 2016-2020 The Software Heritage developers
# Copyright (C) 2016-2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import collections
import functools
import concurrent
import os
from typing import Any, Dict, Iterator, List
from typing import Any, Dict, Optional
from swh.model import hashutil
from swh.model.from_disk import DentryPerms, mode_to_perms
from swh.storage.algos.dir_iterators import dir_iterator
from swh.objstorage.interface import ObjStorageInterface, objid_from_dict
from swh.storage.interface import StorageInterface
MISSING_MESSAGE = (
......@@ -26,18 +26,20 @@ SKIPPED_MESSAGE = (
HIDDEN_MESSAGE = b"This content is hidden."
def get_filtered_files_content(
storage: StorageInterface, files_data: List[Dict]
) -> Iterator[Dict[str, Any]]:
"""Retrieve the files specified by files_data and apply filters for skipped
and missing contents.
def get_filtered_file_content(
storage: StorageInterface,
file_data: Dict[str, Any],
objstorage: Optional[ObjStorageInterface] = None,
) -> Dict[str, Any]:
"""Retrieve the file specified by file_data and apply filters for skipped
and missing content.
Args:
storage: the storage from which to retrieve the objects
files_data: list of file entries as returned by directory_ls()
file_data: a file entry as returned by directory_ls()
Yields:
The entries given in files_data with a new 'content' key that points to
Returns:
The entry given in file_data with a new 'content' key that points to
the file content in bytes.
The contents can be replaced by a specific message to indicate that
......@@ -45,40 +47,44 @@ def get_filtered_files_content(
their sizes were too big for us to archive it).
"""
for file_data in files_data:
status = file_data["status"]
if status == "visible":
sha1 = file_data["sha1"]
data = storage.content_get_data(sha1)
if data is None:
content = SKIPPED_MESSAGE
else:
content = data
elif status == "absent":
status = file_data["status"]
if status == "visible":
hashes = objid_from_dict(file_data)
data: Optional[bytes]
if objstorage is not None:
data = objstorage.get(hashes)
else:
data = storage.content_get_data(hashes)
if data is None:
content = SKIPPED_MESSAGE
elif status == "hidden":
content = HIDDEN_MESSAGE
elif status is None:
content = MISSING_MESSAGE
else:
assert False, (
f"unexpected status {status!r} "
f"for content {hashutil.hash_to_hex(file_data['target'])}"
)
yield {"content": content, **file_data}
def apply_chunked(func, input_list, chunk_size):
"""Apply func on input_list divided in chunks of size chunk_size"""
for i in range(0, len(input_list), chunk_size):
yield from func(input_list[i : i + chunk_size])
content = data
elif status == "absent":
content = SKIPPED_MESSAGE
elif status == "hidden":
content = HIDDEN_MESSAGE
elif status is None:
content = MISSING_MESSAGE
else:
assert False, (
f"unexpected status {status!r} "
f"for content {hashutil.hash_to_hex(file_data['target'])}"
)
return {"content": content, **file_data}
class DirectoryBuilder:
"""Reconstructs the on-disk representation of a directory in the storage."""
def __init__(self, storage: StorageInterface, root: bytes, dir_id: bytes):
def __init__(
self,
storage: StorageInterface,
root: bytes,
dir_id: bytes,
thread_pool_size: int = 10,
objstorage: Optional[ObjStorageInterface] = None,
):
"""Initialize the directory builder.
Args:
......@@ -89,49 +95,53 @@ class DirectoryBuilder:
self.storage = storage
self.root = root
self.dir_id = dir_id
self.thread_pool_size = thread_pool_size
self.objstorage = objstorage
def build(self) -> None:
"""Perform the reconstruction of the directory in the given root."""
# Retrieve data from the database.
# Split into files, revisions and directory data.
entries = collections.defaultdict(list)
for entry in dir_iterator(self.storage, self.dir_id):
entries[entry["type"]].append(entry)
# Recreate the directory's subtree and then the files into it.
self._create_tree(entries["dir"])
self._create_files(entries["file"])
self._create_revisions(entries["rev"])
def _create_tree(self, directories: List[Dict[str, Any]]) -> None:
"""Create a directory tree from the given paths
The tree is created from `root` and each given directory in
`directories` will be created.
"""
# Directories are sorted by depth so they are created in the
# right order
bsep = os.path.sep.encode()
directories = sorted(directories, key=lambda x: len(x["path"].split(bsep)))
for dir in directories:
os.makedirs(os.path.join(self.root, dir["path"]))
def _create_files(self, files_data: List[Dict[str, Any]]) -> None:
"""Create the files in the tree and fetch their contents."""
f = functools.partial(get_filtered_files_content, self.storage)
files_data = apply_chunked(f, files_data, 1000)
for file_data in files_data:
def file_fetcher(file_data: Dict[str, Any]) -> None:
file_data = get_filtered_file_content(
self.storage, file_data, self.objstorage
)
path = os.path.join(self.root, file_data["path"])
self._create_file(path, file_data["content"], file_data["perms"])
def _create_revisions(self, revs_data: List[Dict[str, Any]]) -> None:
"""Create the revisions in the tree as broken symlinks to the target
executor = concurrent.futures.ThreadPoolExecutor(self.thread_pool_size)
futures = []
os.makedirs(self.root, exist_ok=True)
queue = collections.deque([(b"", self.dir_id)])
while queue:
path, dir_id = queue.popleft()
dir_entries = self.storage.directory_ls(dir_id)
for dir_entry in dir_entries:
dir_entry["path"] = os.path.join(path, dir_entry["name"])
match dir_entry["type"]:
case "dir":
self._create_tree(dir_entry)
queue.append((dir_entry["path"], dir_entry["target"]))
case "rev":
self._create_revision(dir_entry)
case "file":
futures.append(executor.submit(file_fetcher, dir_entry))
case _:
raise ValueError(
f"Unsupported directory entry type {dir_entry['type']} for "
f"{dir_entry['name']:r} in directory swh:1:dir:{dir_id.hex()}"
)
concurrent.futures.wait(futures)
def _create_tree(self, directory: Dict[str, Any]) -> None:
"""Create a directory tree from root for the given path."""
os.makedirs(os.path.join(self.root, directory["path"]), exist_ok=True)
def _create_revision(self, rev_data: Dict[str, Any]) -> None:
"""Create the revision in the tree as a broken symlink to the target
identifier."""
for file_data in revs_data:
path = os.path.join(self.root, file_data["path"])
target = hashutil.hash_to_bytehex(file_data["target"])
self._create_file(path, target, mode=DentryPerms.symlink)
os.makedirs(os.path.join(self.root, rev_data["path"]), exist_ok=True)
def _create_file(
self, path: bytes, content: bytes, mode: int = DentryPerms.content
......
[tox]
envlist=black,flake8,mypy,py3
minversion = 4
envlist =
black
flake8
mypy
py3
[testenv]
usedevelop = true
extras =
testing
graph
deps =
pytest-cov
commands =
pytest --cov={envsitepackagesdir}/swh/vault \
{envsitepackagesdir}/swh/vault \
--cov-branch {posargs}
pytest --doctest-modules \
--cov=swh/vault \
--cov-branch \
swh/vault \
{posargs}
[testenv:black]
skip_install = true
deps =
black==22.10.0
black==25.1.0
commands =
{envpython} -m black --check swh
[testenv:flake8]
skip_install = true
deps =
flake8==5.0.4
flake8-bugbear==22.9.23
pycodestyle==2.9.1
flake8==7.1.1
flake8-bugbear==24.12.12
flake8-pyproject==1.2.3
pycodestyle==2.12.1
commands =
{envpython} -m flake8
......@@ -33,7 +43,7 @@ extras =
testing
graph
deps =
mypy==0.942
mypy==1.15.0
commands =
mypy swh
......@@ -41,38 +51,15 @@ commands =
# git HEAD of swh-docs, is executed on CI for each diff to prevent
# breaking doc build
[testenv:sphinx]
whitelist_externals = make
usedevelop = true
extras =
testing
graph
deps =
# fetch and install swh-docs in develop mode
-e git+https://forge.softwareheritage.org/source/swh-docs#egg=swh.docs
setenv =
SWH_PACKAGE_DOC_TOX_BUILD = 1
# turn warnings into errors
SPHINXOPTS = -W
commands =
make -I ../.tox/sphinx/src/swh-docs/swh/ -C docs
# build documentation only inside swh-environment using local state
# of swh-docs package
[testenv:sphinx-dev]
whitelist_externals = make
usedevelop = true
allowlist_externals = make
extras =
testing
graph
deps =
# install swh-docs in develop mode
-e ../swh-docs
# fetch and install swh-docs
git+https://gitlab.softwareheritage.org/swh/devel/swh-docs.git\#egg=swh.docs
setenv =
SWH_PACKAGE_DOC_TOX_BUILD = 1
# turn warnings into errors
SPHINXOPTS = -W
commands =
make -I ../.tox/sphinx-dev/src/swh-docs/swh/ -C docs
make -I {env_dir}/share/swh-docs -C docs