From df2cc485970945c1143732d73ccae6d532cf0a3b Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Thu, 27 Feb 2025 13:47:02 +0100
Subject: [PATCH 01/25] winery: remove Stats object

The stats are only used in the benchmark process and will not be
exploited again.
---
 swh/objstorage/backends/winery/objstorage.py  |  8 --
 swh/objstorage/backends/winery/stats.py       | 94 -------------------
 swh/objstorage/cli.py                         |  2 -
 .../tests/test_objstorage_winery.py           | 19 ----
 swh/objstorage/tests/winery_benchmark.py      |  6 --
 5 files changed, 129 deletions(-)
 delete mode 100644 swh/objstorage/backends/winery/stats.py

diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
index 6e02610..1aab1c4 100644
--- a/swh/objstorage/backends/winery/objstorage.py
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -23,7 +23,6 @@ from .roshard import (
 from .rwshard import RWShard
 from .sharedbase import ShardState, SharedBase
 from .sleep import sleep_exponential
-from .stats import Stats
 
 logger = logging.getLogger(__name__)
 
@@ -149,7 +148,6 @@ class WineryReader(WineryBase):
 
 
 def pack(shard, shared_base=None, clean_immediately=False, **kwargs) -> bool:
-    stats = Stats(kwargs.get("output_dir"))
     rw = RWShard(shard, **kwargs)
 
     count = rw.count()
@@ -158,9 +156,6 @@ def pack(shard, shared_base=None, clean_immediately=False, **kwargs) -> bool:
         logger.info("Created RO shard %s", shard)
         for i, (obj_id, content) in enumerate(rw.all()):
             ro.add(content, obj_id)
-            if stats.stats_active:
-                stats.stats_read(obj_id, content)
-                stats.stats_write(obj_id, content)
             if i % 100 == 99:
                 logger.debug("RO shard %s: added %s/%s objects", shard, i + 1, count)
 
@@ -344,7 +339,6 @@ def shard_packer(
         max_duration=60,
         message="Waiting for RBD image mapping",
     ),
-    output_dir: Optional[str] = None,
     stop_packing: Callable[[int], bool] = never_stop,
     wait_for_shard: Callable[[int], None] = sleep_exponential(
         min_duration=5,
@@ -367,7 +361,6 @@ def shard_packer(
       rbd_wait_for_image: sleep function called to wait for an image (when
        `rbd_create_images`=`False`)
       rbd_*: passed directly to :class:`roshard.Pool`
-      output_dir: output directory for statistics
       stop_packing: callback to determine whether the packer should exit
       wait_for_shard: sleep function called when no shards are available to be packed
     """
@@ -395,7 +388,6 @@ def shard_packer(
                 locked.name,
                 base_dsn=base_dsn,
                 shard_max_size=shard_max_size,
-                output_dir=output_dir,
                 shared_base=base,
                 throttle_read=throttle_read,
                 throttle_write=throttle_write,
diff --git a/swh/objstorage/backends/winery/stats.py b/swh/objstorage/backends/winery/stats.py
deleted file mode 100644
index 6dded97..0000000
--- a/swh/objstorage/backends/winery/stats.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright (C) 2022-2024  The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import collections
-import logging
-import os
-import time
-
-logger = logging.getLogger(__name__)
-
-
-class Stats:
-    def __init__(self, d):
-        if d is None:
-            self._stats_active = False
-            return
-
-        self._stats_active = True
-        if not os.path.exists(d):
-            try:
-                os.makedirs(d)
-            except FileExistsError:
-                # This exception can happen, even when os.makedirs(d,
-                # exist_ok=True) is used, when two concurrent processes try to
-                # create the directory at the same time: one of the two
-                # processes will receive the exception.
-                pass
-        self._stats = collections.Counter()
-        self._stats_filename = f"{d}/{os.getpid()}.csv"
-        self._stats_fd = open(self.stats_filename, "a")
-        if self._stats_fd.tell() == 0:
-            # We created the file, write the CSV header
-            self._stats_fd.write(
-                # time in seconds since epoch
-                "time,"
-                # total number of objects written at this point in time
-                "object_write_count,"
-                # total number of bytes written at this point in time
-                "bytes_write,"
-                # total number of objects read at this point in time
-                "object_read_count,"
-                # total number of bytes read at this point in time
-                "bytes_read"
-                "\n"
-            )
-        else:
-            # The PID was recycled, print an empty stats line at the end of the file to mark it
-            self._stats_print()
-
-        self._stats_last_write = time.monotonic()
-        self._stats_flush_interval = 5
-
-    @property
-    def stats_active(self):
-        return self._stats_active
-
-    @property
-    def stats_filename(self):
-        return self._stats_filename
-
-    def __del__(self):
-        if self.stats_active and not self._stats_fd.closed:
-            self._stats_print()
-            self._stats_fd.close()
-
-    def _stats_print(self):
-        ll = ",".join(
-            str(self._stats[x])
-            for x in [
-                "object_write_count",
-                "bytes_write",
-                "object_read_count",
-                "bytes_read",
-            ]
-        )
-        self._stats_fd.write(f"{int(time.time())},{ll}\n")
-
-    def _stats_maybe_print(self):
-        now = time.monotonic()
-        if now - self._stats_last_write > self._stats_flush_interval:
-            self._stats_print()
-            self._stats_last_write = now
-
-    def stats_read(self, key, content):
-        self._stats["object_read_count"] += 1
-        self._stats["bytes_read"] += len(key) + len(content)
-        self._stats_maybe_print()
-
-    def stats_write(self, key, content):
-        self._stats["object_write_count"] += 1
-        self._stats["bytes_write"] += len(key) + len(content)
-        self._stats_maybe_print()
diff --git a/swh/objstorage/cli.py b/swh/objstorage/cli.py
index 85b3f75..7e0f7da 100644
--- a/swh/objstorage/cli.py
+++ b/swh/objstorage/cli.py
@@ -134,7 +134,6 @@ def winery_packer(ctx, stop_after_shards: Optional[int] = None):
     shard_max_size = config["shard_max_size"]
     throttle_read = config.get("throttle_read", 200 * 1024 * 1024)
     throttle_write = config.get("throttle_write", 200 * 1024 * 1024)
-    output_dir = config.get("output_dir")
     rbd_pool_name = config.get("rbd_pool_name", "shards")
     rbd_data_pool_name = config.get("rbd_data_pool_name")
     rbd_use_sudo = config.get("rbd_use_sudo", True)
@@ -158,7 +157,6 @@ def winery_packer(ctx, stop_after_shards: Optional[int] = None):
         rbd_use_sudo=rbd_use_sudo,
         rbd_map_options=rbd_map_options,
         rbd_create_images=rbd_create_images,
-        output_dir=output_dir,
         stop_packing=stop_packing,
     )
 
diff --git a/swh/objstorage/tests/test_objstorage_winery.py b/swh/objstorage/tests/test_objstorage_winery.py
index ddd6f30..c2e737e 100644
--- a/swh/objstorage/tests/test_objstorage_winery.py
+++ b/swh/objstorage/tests/test_objstorage_winery.py
@@ -32,7 +32,6 @@ from swh.objstorage.backends.winery.objstorage import (
 )
 from swh.objstorage.backends.winery.sharedbase import ShardState, SharedBase
 from swh.objstorage.backends.winery.sleep import sleep_exponential
-from swh.objstorage.backends.winery.stats import Stats
 from swh.objstorage.backends.winery.throttler import (
     BandwidthCalculator,
     IOThrottler,
@@ -1489,24 +1488,6 @@ def test_winery_throttler(postgresql_dsn):
     assert t.throttle_get(reader, key) == content
 
 
-def test_winery_stats(tmpdir):
-    s = Stats(None)
-    assert s.stats_active is False
-    s = Stats(tmpdir / "stats")
-    assert s.stats_active is True
-    assert os.path.exists(s.stats_filename)
-    size = os.path.getsize(s.stats_filename)
-    s._stats_flush_interval = 0
-    k = "KEY"
-    v = "CONTENT"
-    s.stats_read(k, v)
-    s.stats_write(k, v)
-    s.stats_read(k, v)
-    s.stats_write(k, v)
-    s.__del__()
-    assert os.path.getsize(s.stats_filename) > size
-
-
 class TestWineryObjStorage(ObjStorageTestFixture):
     @pytest.fixture(autouse=True)
     def objstorage(self, file_backed_pool, storage):
diff --git a/swh/objstorage/tests/winery_benchmark.py b/swh/objstorage/tests/winery_benchmark.py
index acde695..3e8dc13 100644
--- a/swh/objstorage/tests/winery_benchmark.py
+++ b/swh/objstorage/tests/winery_benchmark.py
@@ -27,7 +27,6 @@ from swh.objstorage.backends.winery.objstorage import (
 )
 from swh.objstorage.backends.winery.roshard import Pool
 from swh.objstorage.backends.winery.sharedbase import ShardState
-from swh.objstorage.backends.winery.stats import Stats
 from swh.objstorage.factory import get_objstorage
 from swh.objstorage.interface import ObjStorageInterface
 from swh.objstorage.objstorage import objid_for_content
@@ -104,7 +103,6 @@ class Worker:
         assert isinstance(
             storage, WineryObjStorage
         ), f"winery_benchmark passed unexpected {storage.__class__.__name__}"
-        self.stats: Stats = Stats(storage.winery.args.get("output_dir"))
         self.storage: WineryObjStorage = storage
 
     def run(self, time_remaining: datetime.timedelta) -> WorkerKind:
@@ -391,8 +389,6 @@ class ROWorker(Worker):
                         break
                     content = self.storage.get(obj_id={"sha256": obj_id})
                     assert content is not None
-                    if self.stats.stats_active:
-                        self.stats.stats_read(obj_id, content)
 
         elapsed = time.monotonic() - start
         logger.info("Worker(ro, %s): finished (%.2fs)", os.getpid(), elapsed)
@@ -456,8 +452,6 @@ class RWWorker(Worker):
             content = random_content.read(random.choice(self.payloads))
             obj_id = objid_for_content(content)
             self.storage.add(content=content, obj_id=obj_id)
-            if self.stats.stats_active:
-                self.stats.stats_write(obj_id, content)
             self.count += 1
         self.finalize()
         elapsed = time.monotonic() - start
-- 
GitLab


From 6a432dd13ad580b36bfbf01829c4923647f7ed05 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Thu, 27 Feb 2025 13:54:03 +0100
Subject: [PATCH 02/25] Clean up winery benchmark tooling

We don't expect to be using Grid'5000 again so this code has not been
maintained with expectations that it would work again. This allows us to
streamline future refactorings as well.

A future objstorage benchmark tool could take inspiration from a lot of
this code, but be agnostic to the backend used.
---
 .pre-commit-config.yaml                       |   2 -
 conftest.py                                   | 100 ---
 docs/winery.rst                               |   5 -
 pyproject.toml                                |   1 -
 .../tests/test_objstorage_winery.py           | 322 +---------
 swh/objstorage/tests/winery_benchmark.py      | 575 ------------------
 tox.ini                                       |   7 -
 winery-test-environment/README.md             |  90 ---
 winery-test-environment/ansible.cfg           |   9 -
 winery-test-environment/bootstrap.yml         |  31 -
 winery-test-environment/build-vms.sh          | 135 ----
 winery-test-environment/ceph.yml              |  93 ---
 winery-test-environment/fed4fire.rspec        |  59 --
 winery-test-environment/fed4fire.sh           |  34 --
 winery-test-environment/grid5000.yml          |  81 ---
 .../inventory/group_vars/all/rw.yml           |   5 -
 winery-test-environment/inventory/groups.yml  |  13 -
 winery-test-environment/inventory/hosts.yml   |  10 -
 winery-test-environment/inventory/osd.yml     |   9 -
 winery-test-environment/libvirt.yml           |   5 -
 .../mitogen-strategy/README.txt               |   1 -
 .../mitogen-strategy/__init__.py              |   0
 .../mitogen-strategy/mitogen.py               |  61 --
 .../mitogen-strategy/mitogen_free.py          |  62 --
 .../mitogen-strategy/mitogen_host_pinned.py   |  67 --
 .../mitogen-strategy/mitogen_linear.py        |  62 --
 winery-test-environment/osd.yml               |  40 --
 winery-test-environment/remote-tox.sh         |  43 --
 winery-test-environment/render-stats.py       |  59 --
 winery-test-environment/requirements.txt      |   5 -
 winery-test-environment/rng.xml               |   5 -
 winery-test-environment/rw.yml                | 111 ----
 winery-test-environment/tests.yml             |  32 -
 33 files changed, 4 insertions(+), 2130 deletions(-)
 delete mode 100644 swh/objstorage/tests/winery_benchmark.py
 delete mode 100644 winery-test-environment/README.md
 delete mode 100644 winery-test-environment/ansible.cfg
 delete mode 100644 winery-test-environment/bootstrap.yml
 delete mode 100755 winery-test-environment/build-vms.sh
 delete mode 100644 winery-test-environment/ceph.yml
 delete mode 100644 winery-test-environment/fed4fire.rspec
 delete mode 100755 winery-test-environment/fed4fire.sh
 delete mode 100644 winery-test-environment/grid5000.yml
 delete mode 100644 winery-test-environment/inventory/group_vars/all/rw.yml
 delete mode 100644 winery-test-environment/inventory/groups.yml
 delete mode 100644 winery-test-environment/inventory/hosts.yml
 delete mode 100644 winery-test-environment/inventory/osd.yml
 delete mode 100644 winery-test-environment/libvirt.yml
 delete mode 100644 winery-test-environment/mitogen-strategy/README.txt
 delete mode 100644 winery-test-environment/mitogen-strategy/__init__.py
 delete mode 100644 winery-test-environment/mitogen-strategy/mitogen.py
 delete mode 100644 winery-test-environment/mitogen-strategy/mitogen_free.py
 delete mode 100644 winery-test-environment/mitogen-strategy/mitogen_host_pinned.py
 delete mode 100644 winery-test-environment/mitogen-strategy/mitogen_linear.py
 delete mode 100644 winery-test-environment/osd.yml
 delete mode 100755 winery-test-environment/remote-tox.sh
 delete mode 100644 winery-test-environment/render-stats.py
 delete mode 100644 winery-test-environment/requirements.txt
 delete mode 100644 winery-test-environment/rng.xml
 delete mode 100644 winery-test-environment/rw.yml
 delete mode 100644 winery-test-environment/tests.yml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b4839f2..8a22c53 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,5 +1,3 @@
-exclude: winery-test-environment/mitogen-strategy
-
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v5.0.0
diff --git a/conftest.py b/conftest.py
index 5adde39..5567392 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,5 +1,3 @@
-import sys
-
 import pytest
 
 pytest_plugins = ["swh.objstorage.pytest_plugin"]
@@ -15,9 +13,6 @@ def pytest_configure(config):
         "clean_immediately(bool): whether the winery packer should clean rw "
         "shards immediately",
     )
-    config.addinivalue_line(
-        "markers", "use_benchmark_flags: use the --winery-bench-* CLI flags"
-    )
     config.addinivalue_line(
         "markers",
         (
@@ -31,101 +26,6 @@ def pytest_configure(config):
 
 
 def pytest_addoption(parser):
-    if sys.version_info >= (3, 9):
-        import argparse
-
-        action = argparse.BooleanOptionalAction
-        default = True
-    else:
-        action = "store_true"
-        default = False
-
-    parser.addoption(
-        "--winery-bench-pack-immediately",
-        action=action,
-        help="Pack objects synchronously in benchmark",
-        default=default,
-    )
-
-    parser.addoption(
-        "--winery-bench-remove-pool",
-        action=action,
-        help="Remove Ceph pool before and after tests",
-        default=default,
-    )
-
-    parser.addoption(
-        "--winery-bench-remove-images",
-        action=action,
-        help="Remove Ceph images after tests",
-        default=default,
-    )
-
-    parser.addoption(
-        "--winery-bench-rbd-pool",
-        help="RBD pool for benchmark",
-        default="winery-benchmark-shards",
-    )
-
-    parser.addoption(
-        "--winery-bench-output-directory",
-        help="Directory in which the performance results are stored",
-        default=None,
-    )
-    parser.addoption(
-        "--winery-bench-rw-workers",
-        type=int,
-        help="Number of Read/Write workers",
-        default=1,
-    )
-    parser.addoption(
-        "--winery-bench-ro-workers",
-        type=int,
-        help="Number of Readonly workers",
-        default=1,
-    )
-    parser.addoption(
-        "--winery-bench-pack-workers",
-        type=int,
-        help="Number of Pack workers",
-        default=1,
-    )
-    parser.addoption(
-        "--winery-bench-duration",
-        type=int,
-        help="Duration of the benchmarks in seconds",
-        default=1,
-    )
-    parser.addoption(
-        "--winery-bench-shard-max-size",
-        type=int,
-        help="Size of the shard in bytes",
-        default=10 * 1024 * 1024,
-    )
-    parser.addoption(
-        "--winery-bench-stats-interval",
-        type=int,
-        help="Interval between stat computations (seconds)",
-        default=5 * 60,
-    )
-    parser.addoption(
-        "--winery-bench-ro-worker-max-request",
-        type=int,
-        help="Number of requests a ro worker performs",
-        default=1,
-    )
-    parser.addoption(
-        "--winery-bench-throttle-read",
-        type=int,
-        help="Maximum number of bytes per second read",
-        default=100 * 1024 * 1024,
-    )
-    parser.addoption(
-        "--winery-bench-throttle-write",
-        type=int,
-        help="Maximum number of bytes per second write",
-        default=100 * 1024 * 1024,
-    )
     parser.addoption(
         "--all-compression-methods",
         action="store_true",
diff --git a/docs/winery.rst b/docs/winery.rst
index 016fbe6..b121836 100644
--- a/docs/winery.rst
+++ b/docs/winery.rst
@@ -42,8 +42,3 @@ When a new object is added to the Write Shard, a new row is added to the global
 After the content of the object is successfully added to the Write Shard, the state of the record in the global index is modified to no longer be in flight. The client is notified that the operation was successful and the object can be read from the Write Shard from that point on.
 
 When the size of the database associated with a Write Shard exceeds a threshold, it is set to be in the `packing` state. All objects it contains can be read from it by any `WineryReader` but no new object will be added to it. A process is spawned and is tasked to transform it into a Read Shard using the `Packer` class. Should it fail for any reason, a cron job will restart it when it finds Write Shards that are both in the `packing` state and not locked by any process. Packing is done by enumerating all the records from the Write Shard database and writing them into a Read Shard by the same name. Incomplete Read Shards will never be used by `WineryReader` because the global index will direct it to use the Write Shard instead. Once the packing completes, the state of the shard is modified to be readonly and from that point on the `WineryReader` will only use the Read Shard to find the objects it contains. The database containing the Write Shard is then destroyed because it is no longer useful and the process terminates on success.
-
-Benchmarks
-----------
-
-Follow the instructions at winery-test-environment/README.md
diff --git a/pyproject.toml b/pyproject.toml
index 31ce31a..3927ad1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -110,7 +110,6 @@ ignore = [
 max-line-length = 88
 extend-exclude = [
     "build",
-    "winery-test-environment/mitogen-strategy",
 ]
 
 [tool.pytest.ini_options]
diff --git a/swh/objstorage/tests/test_objstorage_winery.py b/swh/objstorage/tests/test_objstorage_winery.py
index c2e737e..a238394 100644
--- a/swh/objstorage/tests/test_objstorage_winery.py
+++ b/swh/objstorage/tests/test_objstorage_winery.py
@@ -4,15 +4,12 @@
 # See top-level LICENSE file for more information
 
 from collections import Counter
-from dataclasses import asdict, dataclass
-import datetime
 from functools import partial
 import logging
 import os
 import shutil
 import threading
 import time
-from typing import Any, Dict
 
 from click.testing import CliRunner
 import pytest
@@ -43,19 +40,7 @@ from swh.objstorage.exc import ObjNotFoundError
 from swh.objstorage.factory import get_objstorage
 from swh.objstorage.objstorage import objid_for_content
 from swh.objstorage.tests.objstorage_testing import ObjStorageTestFixture
-from swh.objstorage.utils import call_async
-
-from .winery_benchmark import (
-    Bench,
-    PackWorker,
-    RBDWorker,
-    ROWorker,
-    RWShardCleanerWorker,
-    RWWorker,
-    StatsPrinter,
-    WorkerKind,
-    work,
-)
+
 from .winery_testing_helpers import FileBackedPool, PoolHelper
 
 logger = logging.getLogger(__name__)
@@ -89,34 +74,25 @@ def cli_runner(capsys):
 def remove_pool(request, pytestconfig):
     if os.environ.get("CEPH_HARDCODE_POOL"):
         return False
-    marker = request.node.get_closest_marker("use_benchmark_flags")
-    if marker is None:
+    else:
         return True
 
-    return pytestconfig.getoption("--winery-bench-remove-pool")
-
 
 @pytest.fixture
 def remove_images(request, pytestconfig):
     if os.environ.get("CEPH_HARDCODE_POOL"):
         return False
-    marker = request.node.get_closest_marker("use_benchmark_flags")
-    if marker is None:
+    else:
         return True
 
-    return pytestconfig.getoption("--winery-bench-remove-images")
-
 
 @pytest.fixture
 def rbd_pool_name(request, pytestconfig):
     if os.environ.get("CEPH_HARDCODE_POOL"):
         return os.environ["CEPH_HARDCODE_POOL"]
-    marker = request.node.get_closest_marker("use_benchmark_flags")
-    if marker is None:
+    else:
         return "winery-test-shards"
 
-    return pytestconfig.getoption("--winery-bench-rbd-pool")
-
 
 @pytest.fixture
 def rbd_map_options():
@@ -156,10 +132,6 @@ def file_backed_pool(mocker, tmp_path, shard_max_size, rbd_pool_name):
         "swh.objstorage.backends.winery.roshard.Pool",
         new=FileBackedPool,
     )
-    mocker.patch(
-        "swh.objstorage.tests.winery_benchmark.Pool",
-        new=FileBackedPool,
-    )
     pool = FileBackedPool(shard_max_size=10 * 1024 * 1024, rbd_pool_name=rbd_pool_name)
     pool.image_unmap_all()
     yield pool
@@ -1074,292 +1046,6 @@ def test_winery_ceph_pool(needs_ceph, rbd_map_options):
     assert pool.image_list() == []
 
 
-@pytest.mark.shard_max_size(10 * 1024 * 1024)
-def test_winery_bench_work_ro_rw(storage, image_pool, tmpdir):
-    #
-    # rw worker creates a shard
-    #
-    locked_shard = storage.winery.base.locked_shard
-    shards_info = list(storage.winery.base.list_shards())
-    assert shards_info == [(locked_shard, ShardState.WRITING)]
-    assert (
-        work("rw", storage=storage, time_remaining=datetime.timedelta(seconds=300))
-        == "rw"
-    )
-    shards_info = dict(storage.winery.base.list_shards())
-    assert shards_info[locked_shard].image_available
-    #
-    # ro worker reads a shard
-    #
-    args = {**storage.winery.args, "readonly": True}
-    assert (
-        work(
-            "ro",
-            storage=args,
-            worker_args={"ro": {"max_request": 1}},
-            time_remaining=datetime.timedelta(seconds=300),
-        )
-        == "ro"
-    )
-
-
-@pytest.mark.shard_max_size(10 * 1024 * 1024)
-def test_winery_bench_work_pack(storage, image_pool):
-    pack_args = {
-        "base_dsn": storage.winery.args["base_dsn"],
-        "shard_max_size": storage.winery.args["shard_max_size"],
-        "throttle_read": storage.winery.args["throttle_read"],
-        "throttle_write": storage.winery.args["throttle_write"],
-        "rbd_pool_name": image_pool.pool_name,
-    }
-    assert (
-        work(
-            "pack",
-            storage=storage,
-            worker_args={"pack": pack_args},
-            time_remaining=datetime.timedelta(seconds=300),
-        )
-        == "pack"
-    )
-
-
-@pytest.mark.shard_max_size(10 * 1024 * 1024)
-def test_winery_bench_work_rbd(storage, image_pool):
-    rbd_args = {
-        "base_dsn": storage.winery.args["base_dsn"],
-        "shard_max_size": storage.winery.args["shard_max_size"],
-        "duration": 1,
-        "rbd_pool_name": image_pool.pool_name,
-        "rbd_map_options": image_pool.map_options,
-    }
-    assert (
-        work(
-            "rbd",
-            storage=storage,
-            worker_args={"rbd": rbd_args},
-            time_remaining=datetime.timedelta(seconds=300),
-        )
-        == "rbd"
-    )
-
-
-@pytest.mark.shard_max_size(10 * 1024 * 1024)
-def test_winery_bench_work_rw_shard_cleaner(storage):
-    rw_shard_cleaner_args = {
-        "base_dsn": storage.winery.args["base_dsn"],
-    }
-    assert (
-        work(
-            "rw_shard_cleaner",
-            storage=storage,
-            worker_args={"rw_shard_cleaner": rw_shard_cleaner_args},
-            time_remaining=datetime.timedelta(seconds=300),
-        )
-        == "rw_shard_cleaner"
-    )
-
-
-@pytest.mark.shard_max_size(10 * 1024 * 1024)
-@pytest.mark.pack_immediately(False)
-def test_winery_bench_rw_object_limit(storage):
-    object_limit = 15
-    worker = RWWorker(
-        storage, object_limit=object_limit, single_shard=False, block_until_packed=False
-    )
-
-    assert worker.run(time_remaining=datetime.timedelta(seconds=300)) == "rw"
-
-    with storage.winery.base.pool.connection() as db:
-        c = db.execute("SELECT count(*) from signature2shard")
-        assert c.fetchone() == (object_limit,)
-
-
-@pytest.mark.shard_max_size(10 * 1024 * 1024)
-@pytest.mark.pack_immediately(True)
-def test_winery_bench_rw_block_until_packed(storage, image_pool):
-    worker = RWWorker(storage, single_shard=True, block_until_packed=False)
-
-    assert worker.run(time_remaining=datetime.timedelta(seconds=300)) == "rw"
-
-    packed = 0
-    for packer in storage.winery.packers:
-        packer.join()
-        assert packer.exitcode == 0
-        packed += 1
-
-    assert packed > 0, "did not have any packers to wait for"
-
-
-@pytest.mark.shard_max_size(1024 * 1024)
-@pytest.mark.pack_immediately(True)
-def test_winery_bench_rw_block_until_packed_multiple_shards(storage, image_pool):
-    # 1000 objects will create multiple shards when the limit is 1MB
-    worker = RWWorker(
-        storage, object_limit=1000, single_shard=False, block_until_packed=False
-    )
-
-    assert worker.run(time_remaining=datetime.timedelta(seconds=300)) == "rw"
-
-    packed = 0
-    for packer in storage.winery.packers:
-        packer.join()
-        assert packer.exitcode == 0
-        packed += 1
-
-    assert packed > 0, "did not have any packers to wait for"
-
-
-@dataclass
-class WineryBenchOptions:
-    storage_config: Dict[str, Any]
-    workers_per_kind: Dict[WorkerKind, int]
-    worker_args: Dict[WorkerKind, Dict]
-    duration: float
-
-
-@pytest.fixture
-def bench_options(
-    pytestconfig, postgresql_dsn, rbd_map_options, tmpdir
-) -> WineryBenchOptions:
-    output_dir = pytestconfig.getoption("--winery-bench-output-directory")
-    shard_max_size = pytestconfig.getoption("--winery-bench-shard-max-size")
-    pack_immediately = pytestconfig.getoption("--winery-bench-pack-immediately")
-    duration = pytestconfig.getoption("--winery-bench-duration")
-
-    if not output_dir:
-        output_dir = str(tmpdir)
-
-    storage_config = {
-        "output_dir": output_dir,
-        "shard_max_size": shard_max_size,
-        "pack_immediately": pack_immediately,
-        "base_dsn": postgresql_dsn,
-        "throttle_read": pytestconfig.getoption("--winery-bench-throttle-read"),
-        "throttle_write": pytestconfig.getoption("--winery-bench-throttle-write"),
-        "rbd_pool_name": pytestconfig.getoption("--winery-bench-rbd-pool"),
-    }
-    workers_per_kind: Dict[WorkerKind, int] = {
-        "ro": pytestconfig.getoption("--winery-bench-ro-workers"),
-        "rw": pytestconfig.getoption("--winery-bench-rw-workers"),
-        "stats": (
-            1 if pytestconfig.getoption("--winery-bench-stats-interval") > 0 else 0
-        ),
-    }
-    worker_args: Dict[WorkerKind, Dict] = {
-        "ro": {
-            "max_request": pytestconfig.getoption(
-                "--winery-bench-ro-worker-max-request"
-            )
-        },
-        "pack": {
-            "base_dsn": postgresql_dsn,
-            "output_dir": output_dir,
-            "shard_max_size": shard_max_size,
-            "rbd_create_images": False,
-            "rbd_pool_name": pytestconfig.getoption("--winery-bench-rbd-pool"),
-            "throttle_read": pytestconfig.getoption("--winery-bench-throttle-read"),
-            "throttle_write": pytestconfig.getoption("--winery-bench-throttle-write"),
-        },
-        "stats": {
-            "base_dsn": postgresql_dsn,
-            "shard_max_size": shard_max_size,
-            "interval": pytestconfig.getoption("--winery-bench-stats-interval"),
-        },
-        "rw_shard_cleaner": {
-            "base_dsn": postgresql_dsn,
-        },
-        "rbd": {
-            "base_dsn": postgresql_dsn,
-            "shard_max_size": shard_max_size,
-            "rbd_pool_name": pytestconfig.getoption("--winery-bench-rbd-pool"),
-            "rbd_map_options": rbd_map_options,
-            "duration": duration,
-        },
-    }
-
-    if not pack_immediately:
-        worker_args["rw"] = {"block_until_packed": False}
-        workers_per_kind["pack"] = pytestconfig.getoption("--winery-bench-pack-workers")
-        workers_per_kind["rw_shard_cleaner"] = 1
-        workers_per_kind["rbd"] = 1
-
-    return WineryBenchOptions(
-        storage_config,
-        workers_per_kind,
-        worker_args,
-        duration,
-    )
-
-
-# Only run this one on a Ceph image pool as we won’t be running “real”
-# benchmarks using the file-backed backend.
-@pytest.mark.use_benchmark_flags
-def test_winery_bench_real(bench_options, ceph_pool):
-    count = call_async(Bench(**asdict(bench_options)).run)
-    assert count > 0
-
-
-@pytest.mark.use_benchmark_flags
-def test_winery_bench_fake(bench_options, mocker):
-    class _ROWorker(ROWorker):
-        def run(self, time_remaining: datetime.timedelta):
-            logger.info(
-                "running ro for %s, time remaining: %s",
-                bench_options.duration,
-                time_remaining,
-            )
-            return "ro"
-
-    class _RWWorker(RWWorker):
-        def run(self, time_remaining: datetime.timedelta):
-            logger.info(
-                "running rw for %s, time remaining: %s",
-                bench_options.duration,
-                time_remaining,
-            )
-            return "rw"
-
-    class _PackWorker(PackWorker):
-        def run(self):
-            logger.info("running pack for %s", bench_options.duration)
-            return "pack"
-
-    class _RBDWorker(RBDWorker):
-        def run(self):
-            logger.info("running rbd for %s", bench_options.duration)
-            return "rbd"
-
-    class _RWShardCleanerWorker(RWShardCleanerWorker):
-        def run(self):
-            logger.info("running rw_shard_cleaner for %s", bench_options.duration)
-            return "rw_shard_cleaner"
-
-    class _StatsPrinter(StatsPrinter):
-        def run(self, time_remaining: datetime.timedelta):
-            logger.info(
-                "running stats for %s, remaining: %s",
-                bench_options.duration,
-                time_remaining,
-            )
-            return "stats"
-
-    mocker.patch("swh.objstorage.tests.winery_benchmark.ROWorker", _ROWorker)
-    mocker.patch("swh.objstorage.tests.winery_benchmark.RWWorker", _RWWorker)
-    mocker.patch("swh.objstorage.tests.winery_benchmark.PackWorker", _PackWorker)
-    mocker.patch("swh.objstorage.tests.winery_benchmark.RBDWorker", _RBDWorker)
-    mocker.patch(
-        "swh.objstorage.tests.winery_benchmark.RWShardCleanerWorker",
-        _RWShardCleanerWorker,
-    )
-    mocker.patch("swh.objstorage.tests.winery_benchmark.StatsPrinter", _StatsPrinter)
-    mocker.patch(
-        "swh.objstorage.tests.winery_benchmark.Bench.timeout", side_effect=lambda: True
-    )
-
-    count = call_async(Bench(**asdict(bench_options)).run)
-    assert count == sum(bench_options.workers_per_kind.values())
-
-
 def test_winery_leaky_bucket_tick(mocker):
     total = 100
     half = 50
diff --git a/swh/objstorage/tests/winery_benchmark.py b/swh/objstorage/tests/winery_benchmark.py
deleted file mode 100644
index 3e8dc13..0000000
--- a/swh/objstorage/tests/winery_benchmark.py
+++ /dev/null
@@ -1,575 +0,0 @@
-# Copyright (C) 2021-2025  The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import asyncio
-from collections import Counter
-import concurrent.futures
-import datetime
-import logging
-from multiprocessing import current_process
-import os
-import random
-import sys
-import time
-from typing import Any, Dict, Literal, Optional, Set, Union
-
-import psycopg
-import psycopg_pool
-
-from swh.objstorage.backends.winery.objstorage import (
-    WineryObjStorage,
-    WineryReader,
-    WineryWriter,
-    rw_shard_cleaner,
-    shard_packer,
-)
-from swh.objstorage.backends.winery.roshard import Pool
-from swh.objstorage.backends.winery.sharedbase import ShardState
-from swh.objstorage.factory import get_objstorage
-from swh.objstorage.interface import ObjStorageInterface
-from swh.objstorage.objstorage import objid_for_content
-
-logger = logging.getLogger(__name__)
-
-WorkerKind = Literal["ro", "rw", "pack", "rbd", "rw_shard_cleaner", "stats"]
-
-
-def work(
-    kind: WorkerKind,
-    storage: Union[ObjStorageInterface, Dict[str, Any]],
-    time_remaining: datetime.timedelta,
-    worker_args: Optional[Dict[WorkerKind, Any]] = None,
-    worker_id: int = 0,
-) -> WorkerKind:
-    if not worker_args:
-        worker_args = {}
-
-    kind_args = worker_args.get(kind, {})
-
-    process_name = f"Worker-{kind}-{worker_id}"
-    process = current_process()
-    if process and process.name != "MainProcess":
-        process.name = process_name
-
-    logger.info("Started process %s", process_name)
-
-    application_name = f"Winery Benchmark {process_name}"
-
-    if kind == "ro":
-        try:
-            if isinstance(storage, dict):
-                storage = get_objstorage(
-                    cls="winery",
-                    application_name=application_name,
-                    **{
-                        **storage,
-                        "readonly": True,
-                    },
-                )
-            return ROWorker(storage, **kind_args).run(time_remaining=time_remaining)
-        finally:
-            if isinstance(storage, WineryObjStorage):
-                storage.on_shutdown()
-    elif kind == "rw":
-        try:
-            if isinstance(storage, dict):
-                storage = get_objstorage(
-                    cls="winery", application_name=application_name, **storage
-                )
-            return RWWorker(storage, **kind_args).run(time_remaining=time_remaining)
-        finally:
-            if isinstance(storage, WineryObjStorage):
-                storage.on_shutdown()
-    elif kind == "pack":
-        return PackWorker(application_name=application_name, **kind_args).run()
-    elif kind == "rbd":
-        return RBDWorker(application_name=application_name, **kind_args).run()
-    elif kind == "rw_shard_cleaner":
-        return RWShardCleanerWorker(
-            application_name=application_name, **kind_args
-        ).run()
-    elif kind == "stats":
-        return StatsPrinter(application_name=application_name, **kind_args).run(
-            time_remaining=time_remaining
-        )
-    else:
-        raise ValueError("Unknown worker kind: %s" % kind)
-
-
-class Worker:
-    def __init__(self, storage: ObjStorageInterface):
-        assert isinstance(
-            storage, WineryObjStorage
-        ), f"winery_benchmark passed unexpected {storage.__class__.__name__}"
-        self.storage: WineryObjStorage = storage
-
-    def run(self, time_remaining: datetime.timedelta) -> WorkerKind:
-        raise NotImplementedError
-
-
-class PackWorker:
-    def __init__(
-        self,
-        base_dsn: str,
-        shard_max_size: int,
-        throttle_read: int,
-        throttle_write: int,
-        application_name: Optional[str] = None,
-        rbd_create_images: bool = True,
-        rbd_pool_name: str = "shards",
-        output_dir: Optional[str] = None,
-    ):
-        self.base_dsn = base_dsn
-        self.shard_max_size = shard_max_size
-        self.output_dir = output_dir
-        self.throttle_read = throttle_read
-        self.throttle_write = throttle_write
-        self.rbd_create_images = rbd_create_images
-        self.rbd_pool_name = rbd_pool_name
-        self.application_name = application_name
-        self.waited = 0
-
-    def stop_packing(self, shards_count: int) -> bool:
-        return shards_count >= 1 or self.waited > 60
-
-    def wait_for_shard(self, attempt: int) -> None:
-        if self.waited > 60:
-            raise ValueError("Shard waited for too long")
-        time.sleep(0.1)
-        self.waited += 1
-
-    def run(self) -> Literal["pack"]:
-        shard_packer(
-            base_dsn=self.base_dsn,
-            shard_max_size=self.shard_max_size,
-            throttle_read=self.throttle_read,
-            throttle_write=self.throttle_write,
-            rbd_pool_name=self.rbd_pool_name,
-            rbd_create_images=self.rbd_create_images,
-            rbd_wait_for_image=self.wait_for_shard,
-            output_dir=self.output_dir,
-            stop_packing=self.stop_packing,
-            wait_for_shard=self.wait_for_shard,
-            application_name=self.application_name,
-        )
-        return "pack"
-
-
-class RBDWorker:
-    def __init__(
-        self,
-        base_dsn: str,
-        rbd_pool_name: str,
-        rbd_map_options: str,
-        shard_max_size: int,
-        application_name: Optional[str] = None,
-        duration: int = 10,
-    ):
-        self.base_dsn = base_dsn
-        self.pool = Pool(
-            shard_max_size=shard_max_size,
-            rbd_pool_name=rbd_pool_name,
-            rbd_map_options=rbd_map_options,
-        )
-        self.duration = duration
-        self.started = time.monotonic()
-        self.application_name = application_name
-        self.waited = 0
-
-    def wait_for_shard(self, attempt: int) -> None:
-        time.sleep(1)
-        self.waited += 1
-
-    def stop_running(self) -> bool:
-        return time.monotonic() > self.started + self.duration or self.waited > 5
-
-    def run(self) -> Literal["rbd"]:
-        self.pool.manage_images(
-            base_dsn=self.base_dsn,
-            manage_rw_images=True,
-            wait_for_image=self.wait_for_shard,
-            stop_running=self.stop_running,
-            application_name=self.application_name,
-        )
-        return "rbd"
-
-
-class RWShardCleanerWorker:
-    def __init__(
-        self,
-        base_dsn: str,
-        min_mapped_hosts: int = 1,
-        application_name: Optional[str] = None,
-        duration: int = 10,
-    ):
-        self.base_dsn = base_dsn
-        self.min_mapped_hosts = min_mapped_hosts
-        self.application_name = application_name
-        self.duration = duration
-        self.started = time.monotonic()
-        self.waited = 0
-
-    def stop_cleaning(self, num_cleaned: int) -> bool:
-        return num_cleaned >= 1 or self.waited > 5
-
-    def wait_for_shard(self, attempt: int) -> None:
-        time.sleep(1)
-        self.waited += 1
-
-    def run(self) -> Literal["rw_shard_cleaner"]:
-        rw_shard_cleaner(
-            base_dsn=self.base_dsn,
-            min_mapped_hosts=self.min_mapped_hosts,
-            stop_cleaning=self.stop_cleaning,
-            wait_for_shard=self.wait_for_shard,
-            application_name=self.application_name,
-        )
-        return "rw_shard_cleaner"
-
-
-class StatsPrinter:
-    def __init__(
-        self,
-        base_dsn: str,
-        shard_max_size: int,
-        application_name: Optional[str] = None,
-        interval: int = 5 * 60,
-    ):
-        self.base_dsn = base_dsn
-        self.shard_max_size = shard_max_size
-        self.interval = datetime.timedelta(seconds=interval)
-        self.application_name = application_name or "Winery Benchmark Stats Printer"
-        self.objects_per_shard: Dict[str, int] = {}
-
-    def get_winery_reader(self) -> WineryReader:
-        return WineryReader(
-            base_dsn=self.base_dsn,
-            shard_max_size=self.shard_max_size,
-            application_name=self.application_name,
-        )
-
-    def run(self, time_remaining: datetime.timedelta) -> Literal["stats"]:
-        try:
-            return self._run(time_remaining)
-        except Exception:
-            logger.exception("StatsPrinter.run raised exception")
-            return "stats"
-
-    def _run(self, time_remaining: datetime.timedelta) -> Literal["stats"]:
-        sleep = min(time_remaining, self.interval).total_seconds()
-        if sleep > 1:
-            time.sleep(sleep)
-
-        winery = self.get_winery_reader()
-        shards = list(winery.base.list_shards())
-        shard_counts: Counter[ShardState] = Counter()
-
-        printed_rw_header = False
-
-        for shard_name, _ in shards:
-            # Get a fresh version of the state again to try and avoid a race
-            state = winery.base.get_shard_state(shard_name)
-            shard_counts[state] += 1
-            if state not in {ShardState.STANDBY, ShardState.WRITING}:
-                if shard_name not in self.objects_per_shard:
-                    self.objects_per_shard[shard_name] = winery.base.count_objects(
-                        shard_name
-                    )
-            else:
-                if not printed_rw_header:
-                    logger.info("read-write shard stats:")
-                    printed_rw_header = True
-
-                objects = winery.base.count_objects(shard_name)
-                try:
-                    shard = winery.rwshard(shard_name)
-                    size = shard.size
-                except psycopg_pool.PoolTimeout:
-                    logger.info(
-                        "Shard %s got eaten by the rw shard cleaner, sorry", shard_name
-                    )
-                    size = 0
-                logger.info(
-                    " shard %s (state: %s): objects: %s, total_size: %.1f GiB (%2.1f%%)",
-                    shard_name,
-                    state.name,
-                    objects,
-                    size / (1024 * 1024 * 1024),
-                    100 * size / self.shard_max_size,
-                )
-
-        logger.info(
-            "Read-only shard stats: count: %s, objects: %s, total_size (est.): %.1f GiB",
-            len(self.objects_per_shard),
-            sum(self.objects_per_shard.values()),
-            (len(self.objects_per_shard) * self.shard_max_size) / (1024 * 1024 * 1024),
-        )
-        logger.info(
-            "Shard counts: %s",
-            ", ".join(f"{state.name}: {shard_counts[state]}" for state in ShardState),
-        )
-
-        return "stats"
-
-
-class ROWorker(Worker):
-    def __init__(self, storage: ObjStorageInterface, max_request: int = 1000) -> None:
-        super().__init__(storage)
-
-        if not isinstance(self.storage.winery, WineryReader):
-            raise ValueError(
-                f"Running ro benchmark on {self.storage.winery.__class__.__name__}"
-                ", expected read-only"
-            )
-
-        self.winery: WineryReader = self.storage.winery
-        self.max_request = max_request
-
-    def run(self, time_remaining: datetime.timedelta) -> Literal["ro"]:
-        try:
-            self._ro(time_remaining)
-        except psycopg.OperationalError:
-            # It may happen when the database is dropped, just
-            # conclude the read loop gracefully and move on
-            logger.exception("RO worker got exception...")
-        finally:
-            self.finalize()
-
-        return "ro"
-
-    def _ro(self, time_remaining: datetime.timedelta):
-        cutoff = time.time() + time_remaining.total_seconds()
-        remaining = self.max_request
-
-        start = time.monotonic()
-        tablesample = 0.1
-        random_cutoff = 0.1
-        while remaining:
-            if time.time() > cutoff:
-                break
-            with self.storage.winery.base.pool.connection() as db:
-                limit = min(remaining, 1000)
-                c = db.execute(
-                    """
-                    WITH selected AS (
-                      SELECT signature, random() r
-                      FROM signature2shard TABLESAMPLE BERNOULLI (%s)
-                      WHERE state = 'present' and random() < %s
-                      LIMIT %s)
-                    SELECT signature FROM selected ORDER BY r
-                    """,
-                    (
-                        tablesample,
-                        random_cutoff,
-                        limit,
-                    ),
-                )
-
-                if c.rowcount == 0:
-                    logger.info(
-                        "Worker(ro, %s): empty (tablesample=%s, random_cutoff=%s), sleeping",
-                        os.getpid(),
-                        tablesample,
-                        random_cutoff,
-                    )
-                    tablesample = min(tablesample * 10, 100)
-                    random_cutoff = min(random_cutoff * 3, 1)
-                    time.sleep(1)
-                    continue
-                elif c.rowcount == limit:
-                    tablesample = max(tablesample / 10, 0.1)
-                    random_cutoff = max(random_cutoff / 3, 0.1)
-
-                for (obj_id,) in c:
-                    remaining -= 1
-                    if time.time() > cutoff:
-                        remaining = 0
-                        break
-                    content = self.storage.get(obj_id={"sha256": obj_id})
-                    assert content is not None
-
-        elapsed = time.monotonic() - start
-        logger.info("Worker(ro, %s): finished (%.2fs)", os.getpid(), elapsed)
-
-    def finalize(self):
-        self.storage.on_shutdown()
-
-
-class RWWorker(Worker):
-    """A read-write benchmark worker
-
-    Args:
-      storage: the read-write storage used
-      object_limit: the number of objects written before stopping
-      single_shard: stop when the worker switches to a new shard
-      block_until_packed: whether to wait for shards to be packed before exiting
-    """
-
-    def __init__(
-        self,
-        storage: ObjStorageInterface,
-        object_limit: Optional[int] = None,
-        single_shard: bool = True,
-        block_until_packed: bool = True,
-    ) -> None:
-        super().__init__(storage)
-
-        if not isinstance(self.storage.winery, WineryWriter):
-            raise ValueError(
-                f"Running rw benchmark on {self.storage.winery.__class__.__name__}"
-                ", expected read-write"
-            )
-
-        self.winery: WineryWriter = self.storage.winery
-        self.object_limit = object_limit
-        self.single_shard = single_shard
-        self.block_until_packed = block_until_packed
-        self.count = 0
-
-    def payloads_define(self):
-        self.payloads = [
-            3 * 1024 + 1,
-            3 * 1024 + 1,
-            3 * 1024 + 1,
-            3 * 1024 + 1,
-            3 * 1024 + 1,
-            10 * 1024 + 1,
-            13 * 1024 + 1,
-            16 * 1024 + 1,
-            70 * 1024 + 1,
-            80 * 1024 + 1,
-        ]
-
-    def run(self, time_remaining: datetime.timedelta) -> Literal["rw"]:
-        end = time.monotonic() + time_remaining.total_seconds()
-        self.payloads_define()
-        random_content = open("/dev/urandom", "rb")
-        logger.info("Worker(rw, %s): start", os.getpid())
-        start = time.monotonic()
-        while self.keep_going() and time.monotonic() < end:
-            content = random_content.read(random.choice(self.payloads))
-            obj_id = objid_for_content(content)
-            self.storage.add(content=content, obj_id=obj_id)
-            self.count += 1
-        self.finalize()
-        elapsed = time.monotonic() - start
-        logger.info("Worker(rw, %s): finished (%.2fs)", os.getpid(), elapsed)
-
-        return "rw"
-
-    def keep_going(self) -> bool:
-        if self.object_limit is not None and self.count >= self.object_limit:
-            return False
-        if self.single_shard and self.winery.shards_filled:
-            return False
-
-        return True
-
-    def finalize(self):
-        self.storage.on_shutdown()
-
-        if not self.block_until_packed:
-            return
-        logger.info(
-            "Worker(rw, %s): waiting for %s objects to be packed",
-            os.getpid(),
-            self.count,
-        )
-        for packer in self.winery.packers:
-            packer.join()
-            assert packer.exitcode == 0
-
-
-class Bench(object):
-    def __init__(
-        self,
-        storage_config: Union[ObjStorageInterface, Dict[str, Any]],
-        duration: int,
-        workers_per_kind: Dict[WorkerKind, int],
-        worker_args: Optional[Dict[WorkerKind, Any]] = None,
-    ) -> None:
-        self.storage_config = storage_config
-        self.duration = duration
-        self.workers_per_kind = workers_per_kind
-        self.worker_args = worker_args or {}
-        self.start = 0
-
-    def timer_start(self):
-        self.start = time.monotonic()
-
-    def timeout(self) -> bool:
-        return time.monotonic() - self.start > self.duration
-
-    def time_remaining(self) -> datetime.timedelta:
-        return datetime.timedelta(seconds=self.start + self.duration - time.monotonic())
-
-    async def run(self) -> int:
-        self.timer_start()
-
-        loop = asyncio.get_running_loop()
-
-        workers_count = sum(self.workers_per_kind.values())
-
-        with concurrent.futures.ProcessPoolExecutor(
-            max_workers=workers_count
-        ) as executor:
-            logger.info("Running winery benchmark")
-
-            self.count = 0
-            workers: "Set[asyncio.Future[WorkerKind]]" = set()
-
-            def create_worker(kind: WorkerKind) -> "asyncio.Future[WorkerKind]":
-                self.count += 1
-                logger.info("launched %s worker number %s", kind, self.count)
-                return loop.run_in_executor(
-                    executor,
-                    work,
-                    kind,
-                    self.storage_config,
-                    self.time_remaining(),
-                    self.worker_args,
-                    self.count,
-                )
-
-            for kind, count in self.workers_per_kind.items():
-                for _ in range(count):
-                    workers.add(create_worker(kind))
-
-            while len(workers) > 0:
-                logger.info(
-                    "Waiting for %s workers",
-                    ", ".join(
-                        f"{v} {k}" for k, v in self.workers_per_kind.items() if v
-                    ),
-                )
-                current = workers
-                done, pending = await asyncio.wait(
-                    current, return_when=asyncio.FIRST_COMPLETED
-                )
-                workers = pending
-                exceptions = list(filter(None, [task.exception() for task in done]))
-                if exceptions:
-                    for task in pending:
-                        task.cancel()
-                    if sys.version_info >= (3, 11):
-                        raise BaseExceptionGroup(  # noqa: F821
-                            "Some workers raised an exception", exceptions
-                        )
-                    else:
-                        for exc in exceptions:
-                            logger.error("Worker raised an exception", exc_info=exc)
-                        raise exceptions[0]
-
-                for task in done:
-                    kind = task.result()
-                    logger.info("worker %s complete", kind)
-                    if not self.timeout():
-                        workers.add(create_worker(kind))
-                    else:
-                        self.workers_per_kind[kind] -= 1
-
-            logger.info("Bench.run: finished")
-
-        return self.count
diff --git a/tox.ini b/tox.ini
index 66b7daf..0c8169a 100644
--- a/tox.ini
+++ b/tox.ini
@@ -5,7 +5,6 @@ envlist =
   flake8
   mypy
   py3
-  winery
 
 [testenv]
 usedevelop = true
@@ -23,12 +22,6 @@ commands =
          swh/objstorage \
          {posargs}
 
-[testenv:winery]
-allowlist_externals = bash
-passenv = *
-commands =
-  bash {toxinidir}/winery-test-environment/remote-tox.sh {posargs}
-
 [testenv:black]
 skip_install = true
 deps =
diff --git a/winery-test-environment/README.md b/winery-test-environment/README.md
deleted file mode 100644
index e488f4f..0000000
--- a/winery-test-environment/README.md
+++ /dev/null
@@ -1,90 +0,0 @@
-This purpose of these instructions is to run `tox run -e py3` in an
-environment that has access to a ceph cluster. It enables tests that
-would be otherwise be skipped and increases code coverage.
-
-The environment is composed of eight machines named ceph1 to ceph8.
-
-# Installation
-
-* pip install -r requirements.txt
-* ansible-galaxy install geerlingguy.docker
-
-# Create the machines
-
-## libvirt
-
-* ensure virsh is available
-* ./build-vms.sh
-
-If the internet cnx is slow it may take a while before the OSD show up
-because they require downloading large docker images.
-
-## fed4fire
-
-### Create a base rspec specification.
-
-* /opt/jFed/jFed-Experimenter
-* In the General Tab
-* Create an experiment (New)
-* Add one Physical Node by dragging it
-* Right click on the node and choose "Configure Node"
-* Select testbed: Grid 5000
-* Node => Specific hardware type: dahu-grenoble
-* Disk image => Bullseye base
-* Save under sample.rspec
-* Manually edit to duplicate the nodes
-
-### Run the experiment.
-
-* /opt/jFed/jFed-Experimenter
-* In the General Tab
-* Open Local and load winery-test-environment/fed4fire.rspec
-* Edit ceph1 node to check if the Specific hardware type is dahu-grenoble
-* Click on Topology Viewer
-* Run
-* Give a unique name to the experiment
-* Start experiment
-* Once the provisionning is complete (Testing connectivity to resources on Grid5000) click "Export As"
-* Choose "Export Configuration Management Settings"
-* Save under /tmp/test.zip
-* fed4fire.sh test.zip
-
-# Install the machines
-
-* ansible-playbook -i inventory context/setup.yml ceph.yml bootstrap.yml osd.yml tests.yml
-
-# Run the tests
-
-It copies the content of the repository and "ssh ceph1 tox run -e py3"
-
-* tox run -e winery
-
-# Login into a machine
-
-For each host found in context/ssh-config
-
-* ssh -i context/cluster_key -F context/ssh-config ceph1
-
-# Run the benchmarks
-
-The `tox run -e winery` command is used to run the benchmarks with the desired parameters. Upon completion the raw data can be found in the `winery-test-environment/context/stats` directory and is displayed on the standard output as well as rendered in a graph, if a display is available (see the `winery-test-environment/render-stats.py` for the details).
-
-### Example
-
-* tox run -e winery -- -s --log-cli-level=INFO -vvv -k test_winery_bench_real --winery-bench-duration 30  --winery-shard-max-size $((10 * 1024 * 1024)) --winery-bench-ro-worker-max-request 2000
-
-### Get all benchmark flags
-
-Run the following command and look for flags that start with `--winery-bench-`
-
-* tox run -e winery -- --help
-
-# Destroy
-
-## libvirt
-
-* ./build-vms.sh stop $(seq 1 8)
-
-## fed4fire
-
-It will expire on its own
diff --git a/winery-test-environment/ansible.cfg b/winery-test-environment/ansible.cfg
deleted file mode 100644
index 23d33d2..0000000
--- a/winery-test-environment/ansible.cfg
+++ /dev/null
@@ -1,9 +0,0 @@
-[defaults]
-strategy_plugins = mitogen-strategy
-strategy = mitogen_linear
-private_key_file = ./context/cluster_key
-host_key_checking = false
-
-[ssh_connection]
-ssh_args = -F context/ssh-config
-scp_if_ssh = True
diff --git a/winery-test-environment/bootstrap.yml b/winery-test-environment/bootstrap.yml
deleted file mode 100644
index 530f6c5..0000000
--- a/winery-test-environment/bootstrap.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-- hosts: mon
-  gather_facts: no
-  become: true
-
-  tasks:
-
-    - name: scp context/ceph_key.*
-      copy:
-        src: "context/{{ item }}"
-        dest: "{{ item }}"
-      loop:
-        - ceph_key
-        - ceph_key.pub
-
-    - name: cephadm bootstrap
-      shell: |
-        set -ex
-        cephadm bootstrap --mon-ip {{ hostvars[groups['mon'][0]]['ansible_default_ipv4']['address'] }}
-        cephadm shell ceph cephadm clear-key
-        ceph config-key set mgr/cephadm/ssh_identity_key -i ceph_key
-        ceph config-key set mgr/cephadm/ssh_identity_pub -i ceph_key.pub
-        ceph orch apply osd --all-available-devices
-      args:
-        creates: /etc/ceph/ceph.pub
-
-    - name: cephadm shell ceph mgr fail
-      shell: |
-        set -ex
-        ceph config set mon mon_allow_pool_delete true
-        # does not work for some reason: must be done manually
-        cephadm shell ceph mgr fail # required for mgr/cephadm/ssh_identity* to be refreshed
diff --git a/winery-test-environment/build-vms.sh b/winery-test-environment/build-vms.sh
deleted file mode 100755
index 737559a..0000000
--- a/winery-test-environment/build-vms.sh
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/bin/bash
-
-# Copyright (C) 2021  The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-set -e
-
-: ${LIBVIRT_URI:=qemu:///system}
-VIRSH="virsh --connect $LIBVIRT_URI"
-VIRT_INSTALL="virt-install --connect $LIBVIRT_URI"
-
-function ssh_key() {
-    if ! test -f cluster_key; then
-	ssh-keygen -f cluster_key -N '' -t rsa
-    fi
-    chmod 600 cluster_key
-}
-
-function stop() {
-    local ids="$@"
-
-    for id in $ids ; do
-	$VIRSH destroy ceph$id >& /dev/null || true
-	$VIRSH undefine ceph$id >& /dev/null || true
-	rm -f ceph$id.qcow2
-	rm -f disk$id*.img
-    done
-    $VIRSH net-destroy ceph >& /dev/null || true
-    $VIRSH net-undefine ceph >& /dev/null || true
-}
-
-function start() {
-    local ids="$@"
-
-    ssh_key
-    > ssh-config
-
-    if ! test -f debian-11.qcow2 ; then
-	sudo virt-builder debian-11 --output debian-11.qcow2 --size 10G --format qcow2 --install sudo --run-command 'dpkg-reconfigure --frontend=noninteractive openssh-server' --run-command 'useradd -s /bin/bash -m debian || true ; echo "debian ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-debian'  --ssh-inject debian:file:cluster_key.pub --edit '/etc/network/interfaces: s/ens2/enp1s0/'
-    fi
-
-    if ! $VIRSH net-list --name | grep ceph ; then
-	cat > ceph-net.xml <<EOF
-            <network>
-              <name>ceph</name>
-              <forward mode='nat'/>
-              <bridge name='virbrceph' stp='on' delay='0'/>
-              <ip address='10.11.12.1' netmask='255.255.255.0'>
-                <dhcp>
-                  <range start='10.11.12.100' end='10.11.12.200'/>
-		  <host mac='52:54:00:00:00:01' name='ceph1' ip='10.11.12.211'/>
-		  <host mac='52:54:00:00:00:02' name='ceph2' ip='10.11.12.212'/>
-		  <host mac='52:54:00:00:00:03' name='ceph3' ip='10.11.12.213'/>
-		  <host mac='52:54:00:00:00:04' name='ceph4' ip='10.11.12.214'/>
-		  <host mac='52:54:00:00:00:05' name='ceph5' ip='10.11.12.215'/>
-		  <host mac='52:54:00:00:00:06' name='ceph6' ip='10.11.12.216'/>
-		  <host mac='52:54:00:00:00:07' name='ceph7' ip='10.11.12.217'/>
-		  <host mac='52:54:00:00:00:08' name='ceph8' ip='10.11.12.218'/>
-		  <host mac='52:54:00:00:00:09' name='ceph9' ip='10.11.12.219'/>
-                </dhcp>
-              </ip>
-            </network>
-EOF
-	$VIRSH net-define ceph-net.xml
-	$VIRSH net-start ceph
-    fi
-
-
-    for id in $ids ; do
-	$VIRSH destroy ceph$id >& /dev/null || true
-	$VIRSH undefine ceph$id >& /dev/null || true
-	rm -f ceph$id.qcow2
-	cp --sparse=always debian-11.qcow2 ceph$id.qcow2
-	sudo virt-sysprep -a ceph$id.qcow2 --enable customize --hostname ceph$id
-	$VIRT_INSTALL --network network=ceph,mac=52:54:00:00:00:0$id --boot hd --name ceph$id --memory 2048 --vcpus 1 --cpu host --disk path=$(pwd)/ceph$id.qcow2,bus=virtio,format=qcow2 --os-type=linux --os-variant=debian10 --graphics none --noautoconsole
-	case $id in
-	    1)
-		;;
-	    2)
-		$VIRSH detach-device ceph$id ../rng.xml --live
-		for drive in b c ; do
-		    #
-		    # Without the sleep it fails with:
-		    #
-		    # error: Failed to attach disk
-		    # error: internal error: No more available PCI slots
-		    #
-		    sleep 10
-		    rm -f disk$id$drive.img
-		    qemu-img create -f raw disk$id$drive.img 20G
-		    sudo chown libvirt-qemu disk$id$drive.img
-		    $VIRSH attach-disk ceph$id --source $(pwd)/disk$id$drive.img --target vd$drive --persistent
-		done
-		;;
-	    *)
-		rm -f disk$id.img
-		qemu-img create -f raw disk$id.img 20G
-		sudo chown libvirt-qemu disk$id.img
-		$VIRSH attach-disk ceph$id --source $(pwd)/disk$id.img --target vdb --persistent
-		;;
-	esac
-	cat >> ssh-config <<EOF
-Host ceph$id
-    HostName 10.11.12.21$id
-    Port 22
-    User debian
-    IdentityFile $(pwd)/cluster_key
-    IdentityAgent none
-    ForwardAgent yes
-    TCPKeepAlive            yes
-    Compression             no
-    CheckHostIP no
-    StrictHostKeyChecking no
-EOF
-    done
-}
-
-function restart() {
-    local ids="$@"
-    stop $ids
-    start $ids
-}
-
-cd $(dirname $0)
-mkdir -p context
-ln -sf $(pwd)/libvirt.yml context/setup.yml
-cd context
-
-if test "$1" ; then
-    "$@"
-else
-    restart 1 2 3 5 4 6 7 8
-fi
diff --git a/winery-test-environment/ceph.yml b/winery-test-environment/ceph.yml
deleted file mode 100644
index d82ec52..0000000
--- a/winery-test-environment/ceph.yml
+++ /dev/null
@@ -1,93 +0,0 @@
-#
-# notes to install a client
-#  https://docs.ceph.com/en/latest/cephadm/client-setup/
-#  ceph config generate-minimal-conf > /etc/ceph/ceph.conf
-#  ceph auth get-or-create client.admin > /etc/ceph/ceph.keyring
-#
-- hosts: localhost
-  gather_facts: false
-
-  pre_tasks:
-
-    - name: keygen ceph_key
-      shell: |
-        mkdir -p context
-        ssh-keygen -f context/ceph_key -N '' -t rsa
-      args:
-        creates: context/ceph_key
-
-- hosts: all
-  become: true
-
-  pre_tasks:
-
-    - name: mkdir /root/.ssh
-      file:
-        path: /root/.ssh
-        state: directory
-        mode: 0700
-
-    - name: touch /root/.ssh/authorized_keys
-      file:
-        path: /root/.ssh/authorized_keys
-        state: touch
-
-    - name: add context/ceph_key.pub to /root/.ssh/authorized_keys
-      lineinfile:
-        path: /root/.ssh/authorized_keys
-        line: "{{ lookup('file', 'context/ceph_key.pub') }}"
-
-    - name: apt install
-      apt:
-        name:
-          - htop
-          - iotop
-          - iftop
-          - iperf
-
-- hosts: ceph
-  become: true
-
-  pre_tasks:
-
-    - name: apt install lvm2 curl gnupg2
-      apt:
-        name:
-          - lvm2
-          - curl
-          - gnupg2
-
-    - name: apt-key https://download.ceph.com/keys/release.asc
-      apt_key:
-        url: https://download.ceph.com/keys/release.asc
-
-    - name: add repository
-      apt_repository:
-        repo: "deb https://download.ceph.com/debian-pacific/ bullseye main"
-        filename: ceph
-
-    - name: apt install cephadm ceph-common
-      apt:
-        name:
-          - cephadm
-          - ceph-common
-
-  roles:
-    - geerlingguy.docker
-
-- hosts: all
-  become: true
-  # so that lineinfile does not race against itself
-  serial: 1
-
-  tasks:
-
-    - name: "add {{ inventory_hostname }} to /etc/hosts"
-      lineinfile:
-        path: /etc/hosts
-        line: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} {{ inventory_hostname }}"
-      delegate_to: ceph1
-
-    - name: set hostname
-      hostname:
-        name: "{{ inventory_hostname }}"
diff --git a/winery-test-environment/fed4fire.rspec b/winery-test-environment/fed4fire.rspec
deleted file mode 100644
index 7758d18..0000000
--- a/winery-test-environment/fed4fire.rspec
+++ /dev/null
@@ -1,59 +0,0 @@
-<?xml version='1.0'?>
-<rspec xmlns="http://www.geni.net/resources/rspec/3" type="request" generated_by="jFed RSpec Editor" generated="2021-12-12T13:02:49.068+01:00" xmlns:emulab="http://www.protogeni.net/resources/rspec/ext/emulab/1" xmlns:delay="http://www.protogeni.net/resources/rspec/ext/delay/1" xmlns:jfed-command="http://jfed.iminds.be/rspec/ext/jfed-command/1" xmlns:client="http://www.protogeni.net/resources/rspec/ext/client/1" xmlns:jfed-ssh-keys="http://jfed.iminds.be/rspec/ext/jfed-ssh-keys/1" xmlns:jfed="http://jfed.iminds.be/rspec/ext/jfed/1" xmlns:sharedvlan="http://www.protogeni.net/resources/rspec/ext/shared-vlan/1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.geni.net/resources/rspec/3 http://www.geni.net/resources/rspec/3/request.xsd ">
-  <node client_id="ceph1" exclusive="true" component_manager_id="urn:publicid:IDN+am.grid5000.fr+authority+am">
-    <sliver_type name="raw-pc">
-      <disk_image name="urn:publicid:IDN+am.grid5000.fr+image+kadeploy3:debian11-x64-base"/>
-    </sliver_type>
-    <hardware_type name="dahu-grenoble"/>
-    <location xmlns="http://jfed.iminds.be/rspec/ext/jfed/1" x="58.0" y="93.0"/>
-  </node>
-  <node client_id="ceph2" exclusive="true" component_manager_id="urn:publicid:IDN+am.grid5000.fr+authority+am">
-    <sliver_type name="raw-pc">
-      <disk_image name="urn:publicid:IDN+am.grid5000.fr+image+kadeploy3:debian11-x64-base"/>
-    </sliver_type>
-    <hardware_type name="dahu-grenoble"/>
-    <location xmlns="http://jfed.iminds.be/rspec/ext/jfed/1" x="158.0" y="93.0"/>
-  </node>
-  <node client_id="ceph3" exclusive="true" component_manager_id="urn:publicid:IDN+am.grid5000.fr+authority+am">
-    <sliver_type name="raw-pc">
-      <disk_image name="urn:publicid:IDN+am.grid5000.fr+image+kadeploy3:debian11-x64-base"/>
-    </sliver_type>
-    <hardware_type name="dahu-grenoble"/>
-    <location xmlns="http://jfed.iminds.be/rspec/ext/jfed/1" x="258.0" y="93.0"/>
-  </node>
-  <node client_id="ceph4" exclusive="true" component_manager_id="urn:publicid:IDN+am.grid5000.fr+authority+am">
-    <sliver_type name="raw-pc">
-      <disk_image name="urn:publicid:IDN+am.grid5000.fr+image+kadeploy3:debian11-x64-base"/>
-    </sliver_type>
-    <hardware_type name="dahu-grenoble"/>
-    <location xmlns="http://jfed.iminds.be/rspec/ext/jfed/1" x="358.0" y="93.0"/>
-  </node>
-  <node client_id="ceph5" exclusive="true" component_manager_id="urn:publicid:IDN+am.grid5000.fr+authority+am">
-    <sliver_type name="raw-pc">
-      <disk_image name="urn:publicid:IDN+am.grid5000.fr+image+kadeploy3:debian11-x64-base"/>
-    </sliver_type>
-    <hardware_type name="dahu-grenoble"/>
-    <location xmlns="http://jfed.iminds.be/rspec/ext/jfed/1" x="458.0" y="93.0"/>
-  </node>
-  <node client_id="ceph6" exclusive="true" component_manager_id="urn:publicid:IDN+am.grid5000.fr+authority+am">
-    <sliver_type name="raw-pc">
-      <disk_image name="urn:publicid:IDN+am.grid5000.fr+image+kadeploy3:debian11-x64-base"/>
-    </sliver_type>
-    <hardware_type name="dahu-grenoble"/>
-    <location xmlns="http://jfed.iminds.be/rspec/ext/jfed/1" x="58.0" y="193.0"/>
-  </node>
-  <node client_id="ceph7" exclusive="true" component_manager_id="urn:publicid:IDN+am.grid5000.fr+authority+am">
-    <sliver_type name="raw-pc">
-      <disk_image name="urn:publicid:IDN+am.grid5000.fr+image+kadeploy3:debian11-x64-base"/>
-    </sliver_type>
-    <hardware_type name="dahu-grenoble"/>
-    <location xmlns="http://jfed.iminds.be/rspec/ext/jfed/1" x="158.0" y="193.0"/>
-  </node>
-  <node client_id="ceph8" exclusive="true" component_manager_id="urn:publicid:IDN+am.grid5000.fr+authority+am">
-    <sliver_type name="raw-pc">
-      <disk_image name="urn:publicid:IDN+am.grid5000.fr+image+kadeploy3:debian11-x64-base"/>
-    </sliver_type>
-    <hardware_type name="dahu-grenoble"/>
-    <location xmlns="http://jfed.iminds.be/rspec/ext/jfed/1" x="258.0" y="193.0"/>
-  </node>
-</rspec>
\ No newline at end of file
diff --git a/winery-test-environment/fed4fire.sh b/winery-test-environment/fed4fire.sh
deleted file mode 100755
index a6a2792..0000000
--- a/winery-test-environment/fed4fire.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (C) 2021  The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-set -e
-
-function context() {
-    local fed4fire=$1
-
-    if ! test "$fed4fire" ; then
-	return
-    fi
-
-    rm -fr ./context/fed4fire
-    mkdir -p ./context/fed4fire
-    cp $fed4fire ./context/fed4fire/fed4fire.zip
-    local here=$(pwd)
-    (
-	cd ./context/fed4fire
-	unzip fed4fire.zip
-	sed -i \
-	    -e 's|IdentityFile ./id_rsa$|IdentityFile '"${here}"'/context/cluster_key|' \
-	    -e "s|-F ssh-config|-F ${here}/context/ssh-config|" \
-	    ssh-config
-	cp ssh-config ..
-	mv id_rsa ../cluster_key
-	mv id_rsa.pub ../cluster_key.pub
-    )
-}
-
-ln -sf $(pwd)/grid5000.yml context/setup.yml
-
-context "$@"
diff --git a/winery-test-environment/grid5000.yml b/winery-test-environment/grid5000.yml
deleted file mode 100644
index b53203e..0000000
--- a/winery-test-environment/grid5000.yml
+++ /dev/null
@@ -1,81 +0,0 @@
-# https://www.grid5000.fr/w/Docker#Using_docker-cache.grid5000.fr
-
-- hosts: mon
-  gather_facts: no
-  become: true
-
-  tasks:
-
-    - name: Add the user 'debian'
-      user:
-        name: debian
-
-    - name: Allow 'debian' group to have passwordless sudo
-      lineinfile:
-        dest: /etc/sudoers
-        state: present
-        regexp: '^%debian'
-        line: '%debian ALL=(ALL) NOPASSWD: ALL'
-        validate: visudo -cf %s
-
-    - name: mkdir /home/debian/.ssh
-      file:
-        path: /home/debian/.ssh
-        state: directory
-        mode: 0700
-        owner: debian
-        group: debian
-
-
-    - name: copy authorized_keys to /home/debian
-      shell: |
-        cp /root/.ssh/authorized_keys /home/debian/.ssh/authorized_keys
-        chown debian:debian /home/debian/.ssh/authorized_keys
-        chmod 0600 /home/debian/.ssh/authorized_keys
-
-- hosts: osd
-  become: true
-
-  tasks:
-
-    # do that before lvm gets a chance to investigate and get the wrong idea
-    # about /dev/disk2 on grid5000 because there surely will be leftovers from
-    # whoever used the machine last
-    - name: clear leftovers from the disk to be used for OSDs
-      shell: |
-        dd if=/dev/zero of=/dev/disk2 count=100 bs=1024k
-        touch /etc/dd.done
-      args:
-        creates: /etc/dd.done
-
-- hosts: all
-  become: true
-
-  pre_tasks:
-
-    - name: mkdir /etc/docker
-      file:
-        path: /etc/docker
-        state: directory
-        mode: 755
-
-  roles:
-    - geerlingguy.docker
-
-  tasks:
-
-    - name: docker cache
-      copy:
-        content: |
-          {
-            "registry-mirrors": [
-              "http://docker-cache.grid5000.fr"
-            ],
-            "bip": "192.168.42.1/24"
-          }
-        dest: /etc/docker/daemon.json
-
-    - name: systemctl restart docker
-      service:
-        name: docker
-        state: restarted
diff --git a/winery-test-environment/inventory/group_vars/all/rw.yml b/winery-test-environment/inventory/group_vars/all/rw.yml
deleted file mode 100644
index e2f6a0d..0000000
--- a/winery-test-environment/inventory/group_vars/all/rw.yml
+++ /dev/null
@@ -1,5 +0,0 @@
----
-rw_disk1: /dev/vdb
-rw_disk2: /dev/vdc
-postgres_shared_buffers: 512MB
-postgres_effective_cache_size: 1GB
diff --git a/winery-test-environment/inventory/groups.yml b/winery-test-environment/inventory/groups.yml
deleted file mode 100644
index ca15a19..0000000
--- a/winery-test-environment/inventory/groups.yml
+++ /dev/null
@@ -1,13 +0,0 @@
----
-ceph:
-  children:
-    mon:
-    osd:
-
-mon:
-  hosts:
-    ceph1:
-
-rw:
-  hosts:
-    ceph2:
diff --git a/winery-test-environment/inventory/hosts.yml b/winery-test-environment/inventory/hosts.yml
deleted file mode 100644
index a28f861..0000000
--- a/winery-test-environment/inventory/hosts.yml
+++ /dev/null
@@ -1,10 +0,0 @@
-all:
-  hosts:
-    ceph1: {ansible_host: ceph1, ansible_port: '22', ansible_python_interpreter: '/usr/bin/python3'}
-    ceph2: {ansible_host: ceph2, ansible_port: '22', ansible_python_interpreter: '/usr/bin/python3'}
-    ceph3: {ansible_host: ceph3, ansible_port: '22', ansible_python_interpreter: '/usr/bin/python3'}
-    ceph4: {ansible_host: ceph4, ansible_port: '22', ansible_python_interpreter: '/usr/bin/python3'}
-    ceph5: {ansible_host: ceph5, ansible_port: '22', ansible_python_interpreter: '/usr/bin/python3'}
-    ceph6: {ansible_host: ceph6, ansible_port: '22', ansible_python_interpreter: '/usr/bin/python3'}
-    ceph7: {ansible_host: ceph7, ansible_port: '22', ansible_python_interpreter: '/usr/bin/python3'}
-    ceph8: {ansible_host: ceph8, ansible_port: '22', ansible_python_interpreter: '/usr/bin/python3'}
diff --git a/winery-test-environment/inventory/osd.yml b/winery-test-environment/inventory/osd.yml
deleted file mode 100644
index 7b8348c..0000000
--- a/winery-test-environment/inventory/osd.yml
+++ /dev/null
@@ -1,9 +0,0 @@
----
-osd:
-  hosts:
-    ceph3:
-    ceph4:
-    ceph5:
-    ceph6:
-    ceph7:
-    ceph8:
diff --git a/winery-test-environment/libvirt.yml b/winery-test-environment/libvirt.yml
deleted file mode 100644
index 8a17f1b..0000000
--- a/winery-test-environment/libvirt.yml
+++ /dev/null
@@ -1,5 +0,0 @@
-# libvirt specific actions
-
-- hosts: mon
-  gather_facts: no
-  become: true
diff --git a/winery-test-environment/mitogen-strategy/README.txt b/winery-test-environment/mitogen-strategy/README.txt
deleted file mode 100644
index d2675a7..0000000
--- a/winery-test-environment/mitogen-strategy/README.txt
+++ /dev/null
@@ -1 +0,0 @@
-Copied from mitogen because of https://github.com/mitogen-hq/mitogen/issues/568
diff --git a/winery-test-environment/mitogen-strategy/__init__.py b/winery-test-environment/mitogen-strategy/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/winery-test-environment/mitogen-strategy/mitogen.py b/winery-test-environment/mitogen-strategy/mitogen.py
deleted file mode 100644
index 6687266..0000000
--- a/winery-test-environment/mitogen-strategy/mitogen.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright 2019, David Wilson
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its contributors
-# may be used to endorse or promote products derived from this software without
-# specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-from __future__ import absolute_import
-import os.path
-import sys
-
-#
-# This is not the real Strategy implementation module, it simply exists as a
-# proxy to the real module, which is loaded using Python's regular import
-# mechanism, to prevent Ansible's PluginLoader from making up a fake name that
-# results in ansible_mitogen plugin modules being loaded twice: once by
-# PluginLoader with a name like "ansible.plugins.strategy.mitogen", which is
-# stuffed into sys.modules even though attempting to import it will trigger an
-# ImportError, and once under its canonical name, "ansible_mitogen.strategy".
-#
-# Therefore we have a proxy module that imports it under the real name, and
-# sets up the duff PluginLoader-imported module to just contain objects from
-# the real module, so duplicate types don't exist in memory, and things like
-# debuggers and isinstance() work predictably.
-#
-
-BASE_DIR = os.path.abspath(
-    os.path.join(os.path.dirname(__file__), '../../..')
-)
-
-if BASE_DIR not in sys.path:
-    sys.path.insert(0, BASE_DIR)
-
-import ansible_mitogen.strategy
-import ansible.plugins.strategy.linear
-
-
-class StrategyModule(ansible_mitogen.strategy.StrategyMixin,
-                     ansible.plugins.strategy.linear.StrategyModule):
-    pass
diff --git a/winery-test-environment/mitogen-strategy/mitogen_free.py b/winery-test-environment/mitogen-strategy/mitogen_free.py
deleted file mode 100644
index ffe2fbd..0000000
--- a/winery-test-environment/mitogen-strategy/mitogen_free.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright 2019, David Wilson
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its contributors
-# may be used to endorse or promote products derived from this software without
-# specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-from __future__ import absolute_import
-import os.path
-import sys
-
-#
-# This is not the real Strategy implementation module, it simply exists as a
-# proxy to the real module, which is loaded using Python's regular import
-# mechanism, to prevent Ansible's PluginLoader from making up a fake name that
-# results in ansible_mitogen plugin modules being loaded twice: once by
-# PluginLoader with a name like "ansible.plugins.strategy.mitogen", which is
-# stuffed into sys.modules even though attempting to import it will trigger an
-# ImportError, and once under its canonical name, "ansible_mitogen.strategy".
-#
-# Therefore we have a proxy module that imports it under the real name, and
-# sets up the duff PluginLoader-imported module to just contain objects from
-# the real module, so duplicate types don't exist in memory, and things like
-# debuggers and isinstance() work predictably.
-#
-
-BASE_DIR = os.path.abspath(
-    os.path.join(os.path.dirname(__file__), '../../..')
-)
-
-if BASE_DIR not in sys.path:
-    sys.path.insert(0, BASE_DIR)
-
-import ansible_mitogen.loaders
-import ansible_mitogen.strategy
-
-
-Base = ansible_mitogen.loaders.strategy_loader.get('free', class_only=True)
-
-class StrategyModule(ansible_mitogen.strategy.StrategyMixin, Base):
-    pass
diff --git a/winery-test-environment/mitogen-strategy/mitogen_host_pinned.py b/winery-test-environment/mitogen-strategy/mitogen_host_pinned.py
deleted file mode 100644
index 23eccd3..0000000
--- a/winery-test-environment/mitogen-strategy/mitogen_host_pinned.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright 2019, David Wilson
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its contributors
-# may be used to endorse or promote products derived from this software without
-# specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-from __future__ import absolute_import
-import os.path
-import sys
-
-#
-# This is not the real Strategy implementation module, it simply exists as a
-# proxy to the real module, which is loaded using Python's regular import
-# mechanism, to prevent Ansible's PluginLoader from making up a fake name that
-# results in ansible_mitogen plugin modules being loaded twice: once by
-# PluginLoader with a name like "ansible.plugins.strategy.mitogen", which is
-# stuffed into sys.modules even though attempting to import it will trigger an
-# ImportError, and once under its canonical name, "ansible_mitogen.strategy".
-#
-# Therefore we have a proxy module that imports it under the real name, and
-# sets up the duff PluginLoader-imported module to just contain objects from
-# the real module, so duplicate types don't exist in memory, and things like
-# debuggers and isinstance() work predictably.
-#
-
-BASE_DIR = os.path.abspath(
-    os.path.join(os.path.dirname(__file__), '../../..')
-)
-
-if BASE_DIR not in sys.path:
-    sys.path.insert(0, BASE_DIR)
-
-import ansible_mitogen.loaders
-import ansible_mitogen.strategy
-
-
-Base = ansible_mitogen.loaders.strategy_loader.get('host_pinned', class_only=True)
-
-if Base is None:
-    raise ImportError(
-        'The host_pinned strategy is only available in Ansible 2.7 or newer.'
-    )
-
-class StrategyModule(ansible_mitogen.strategy.StrategyMixin, Base):
-    pass
diff --git a/winery-test-environment/mitogen-strategy/mitogen_linear.py b/winery-test-environment/mitogen-strategy/mitogen_linear.py
deleted file mode 100644
index 1b198e6..0000000
--- a/winery-test-environment/mitogen-strategy/mitogen_linear.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright 2019, David Wilson
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice,
-# this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its contributors
-# may be used to endorse or promote products derived from this software without
-# specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-from __future__ import absolute_import
-import os.path
-import sys
-
-#
-# This is not the real Strategy implementation module, it simply exists as a
-# proxy to the real module, which is loaded using Python's regular import
-# mechanism, to prevent Ansible's PluginLoader from making up a fake name that
-# results in ansible_mitogen plugin modules being loaded twice: once by
-# PluginLoader with a name like "ansible.plugins.strategy.mitogen", which is
-# stuffed into sys.modules even though attempting to import it will trigger an
-# ImportError, and once under its canonical name, "ansible_mitogen.strategy".
-#
-# Therefore we have a proxy module that imports it under the real name, and
-# sets up the duff PluginLoader-imported module to just contain objects from
-# the real module, so duplicate types don't exist in memory, and things like
-# debuggers and isinstance() work predictably.
-#
-
-BASE_DIR = os.path.abspath(
-    os.path.join(os.path.dirname(__file__), '../../..')
-)
-
-if BASE_DIR not in sys.path:
-    sys.path.insert(0, BASE_DIR)
-
-import ansible_mitogen.loaders
-import ansible_mitogen.strategy
-
-
-Base = ansible_mitogen.loaders.strategy_loader.get('linear', class_only=True)
-
-class StrategyModule(ansible_mitogen.strategy.StrategyMixin, Base):
-    pass
diff --git a/winery-test-environment/osd.yml b/winery-test-environment/osd.yml
deleted file mode 100644
index 8c5e32f..0000000
--- a/winery-test-environment/osd.yml
+++ /dev/null
@@ -1,40 +0,0 @@
----
-- hosts: osd
-  gather_facts: no
-  become: true
-
-  tasks:
-
-    - name: add host
-      shell: |
-        ceph orch host add {{ inventory_hostname }}
-      delegate_to: ceph1
-
-- hosts: osd
-  gather_facts: no
-  become: true
-
-  tasks:
-
-    - name: wait for host
-      shell: |
-        ceph orch host ls | grep '^{{ inventory_hostname }} '
-      delegate_to: ceph1
-      register: host
-      until: host is success
-      retries: 30
-      delay: 5
-
-- hosts: osd
-  gather_facts: no
-  become: true
-
-  tasks:
-
-    # the desired side effect here is twofold
-    # * device zap blocks until the osd daemon is ready on the target host
-    # * on grid5000 /dev/disk2 needs to be applied
-    - name: zap /dev/disk2
-      shell: |
-        ceph orch device zap {{ inventory_hostname }} /dev/disk2 --force || true
-      delegate_to: ceph1
diff --git a/winery-test-environment/remote-tox.sh b/winery-test-environment/remote-tox.sh
deleted file mode 100755
index 63581ac..0000000
--- a/winery-test-environment/remote-tox.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (C) 2021  The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-set -ex
-
-DIR=winery-test-environment
-SSH="ssh -i ${DIR}/context/cluster_key -F ${DIR}/context/ssh-config"
-
-function sanity_check() {
-    if ! test -f ${DIR}/context/cluster_key ; then
-	echo "${DIR}/context/cluster_key does not exist"
-	echo "check ${DIR}/README.md for instructions."
-	return 1
-    fi
-}
-
-function copy_to() {
-    RSYNC_RSH="$SSH" rsync -av --exclude=.mypy_cache --exclude=.coverage --exclude=.eggs --exclude=swh.objstorage.egg-info --exclude=winery-test-environment/context --exclude=.tox --exclude='*~' --exclude=__pycache__ --exclude='*.py[co]' $(git rev-parse --show-toplevel)/ debian@ceph1:/home/debian/swh-objstorage/
-}
-
-function copy_from() {
-    RSYNC_RSH="$SSH" rsync -av --delete debian@ceph1:/tmp/winery/ ${DIR}/context/stats/
-}
-
-function render() {
-    python ${DIR}/render-stats.py ${DIR}/context/stats/
-}
-
-function run() {
-    sanity_check || return 1
-
-    copy_to || return 1
-
-    $SSH -t debian@ceph1 bash -c "'cd swh-objstorage ; ../venv/bin/tox run -e py3 -- -k test_winery $*'" || return 1
-
-    copy_from || return 1
-
-    render || return 1
-}
-
-run "$@"
diff --git a/winery-test-environment/render-stats.py b/winery-test-environment/render-stats.py
deleted file mode 100644
index 6da9dcc..0000000
--- a/winery-test-environment/render-stats.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (C) 2022  The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import os
-import sys
-
-from matplotlib import pyplot as plt
-from matplotlib.ticker import FormatStrFormatter
-import pandas as pd
-
-
-def human(size, unit):
-    if size < 1024:
-        return f"{int(size)} {unit}/s"
-    elif size / 1024 < 1024:
-        return f"{round(size/1024, 1)} K{unit}/s"
-    elif size / (1024 * 1024) < 1024:
-        return f"{round(size / (1024 * 1024), 1)} M{unit}/s"
-    elif size / (1024 * 1024 * 1024) < 1024:
-        return f"{round(size / (1024 * 1024 * 1024), 1)} G{unit}/s"
-
-
-def read_stats(stats):
-    dfs = []
-    files = os.listdir(stats)
-    for file in files:
-        f = f"{stats}/{file}"
-        if not os.path.isfile(f):
-            continue
-        dfs.append(pd.read_csv(f))
-    df = pd.concat(dfs)
-    df.set_index("time")
-    return df.sort_values(by=["time"])
-
-
-def main(stats):
-    df = read_stats(stats)
-    print(df)
-    t = df["time"].to_numpy()
-    sec = t[-1] - t[0]
-    a = df.sum() / sec
-    print(f'Bytes write   {human(a["bytes_write"], "B")}')
-    print(f'Objects write {human(a["object_write_count"], "object")}')
-    print(f'Bytes read    {human(a["bytes_read"], "B")}')
-    print(f'Objects read  {human(a["object_read_count"], "object")}')
-
-    df["date"] = pd.to_datetime(df["time"], unit="s")
-
-    p = df.plot(x="time", y=["bytes_write", "bytes_read"])
-    p.set_xlabel("Time")
-    p.yaxis.set_major_formatter(FormatStrFormatter("%.0f"))
-    p.set_ylabel("B/s")
-    plt.show()
-
-
-if __name__ == "__main__":
-    main(sys.argv[1])
diff --git a/winery-test-environment/requirements.txt b/winery-test-environment/requirements.txt
deleted file mode 100644
index 00b43ed..0000000
--- a/winery-test-environment/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-ansible
-mitogen
-pandas
-matplotlib
-PyQt5
diff --git a/winery-test-environment/rng.xml b/winery-test-environment/rng.xml
deleted file mode 100644
index 6ee1641..0000000
--- a/winery-test-environment/rng.xml
+++ /dev/null
@@ -1,5 +0,0 @@
-    <rng model='virtio'>
-      <backend model='random'>/dev/urandom</backend>
-      <alias name='rng0'/>
-      <address type='pci' domain='0x0000' bus='0x06' slot='0x00' function='0x0'/>
-    </rng>
diff --git a/winery-test-environment/rw.yml b/winery-test-environment/rw.yml
deleted file mode 100644
index 8cfdade..0000000
--- a/winery-test-environment/rw.yml
+++ /dev/null
@@ -1,111 +0,0 @@
----
-- name: install and configure Read Write Storage
-  hosts: rw
-  become: true
-
-  pre_tasks:
-
-    - name: zap attached disks
-      shell: |
-        for disk in {{ rw_disk1 }} {{ rw_disk2 }} ; do
-          dd if=/dev/zero of=$disk count=100 bs=1024k
-        done
-        touch /etc/zapped.done
-      args:
-        creates: /etc/zapped.done
-
-    - name: apt install lvm2
-      apt:
-        name:
-          - lvm2
-
-    - name: vgcreate pg
-      lvg:
-        vg: pg
-        pvs: "{{ rw_disk1 }},{{ rw_disk2 }}"
-
-    - name: lvcreate pg
-      lvol:
-        vg: pg
-        lv: pg
-        size: +100%FREE
-
-    - name: mkfs /dev/mapper/pg-pg
-      filesystem:
-        fstype: ext4
-#        force: yes
-        dev: /dev/mapper/pg-pg
-
-    - name: mkdir /var/lib/postgresql
-      file:
-        path: /var/lib/postgresql
-        state: directory
-        mode: 755
-
-    - name: mount /var/lib/postgresql
-      mount:
-        path: /var/lib/postgresql
-        src: /dev/mapper/pg-pg
-        fstype: ext4
-        state: mounted
-
-    - name: apt install postgres
-      apt:
-        name:
-          - postgresql
-          - postgresql-contrib
-          - libpq-dev
-          - python3-psycopg
-          - python3-psycopg_pool
-          - acl
-
-    - name: postgresql.conf max_connections = 1000
-      lineinfile:
-        path: /etc/postgresql/13/main/postgresql.conf
-        regexp: '^max_connections'
-        line: "max_connections = 1000"
-
-    #
-    # https://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server
-    #
-    - name: postgresql.conf shared_buffers
-      lineinfile:
-        path: /etc/postgresql/13/main/postgresql.conf
-        regexp: '^shared_buffers'
-        # 1/4 RAM
-        line: "shared_buffers = {{ postgres_shared_buffers }}"
-
-    - name: postgresql.conf effective_cache_size
-      lineinfile:
-        path: /etc/postgresql/13/main/postgresql.conf
-        regexp: '.*effective_cache_size'
-        # 1/2 RAM
-        line: "effective_cache_size = {{ postgres_effective_cache_size }}"
-
-    - name: postgresql.conf random_page_cost
-      lineinfile:
-        path: /etc/postgresql/13/main/postgresql.conf
-        regexp: '.*random_page_cost'
-        line: "random_page_cost = 2.0"
-
-    - name: listen on *
-      lineinfile:
-        path: /etc/postgresql/13/main/postgresql.conf
-        line: "listen_addresses = '*'"
-
-    - name: allow all connexions
-      lineinfile:
-        path: /etc/postgresql/13/main/pg_hba.conf
-        line: "host all all 0.0.0.0/0 trust"
-
-    - name: systemctl restart postgresql
-      service:
-        name: postgresql
-        state: restarted
-
-    - name: pg user testuser/testpassword
-      postgresql_user:
-        name: testuser
-        password: testpassword
-        role_attr_flags: SUPERUSER
-      become_user: postgres
diff --git a/winery-test-environment/tests.yml b/winery-test-environment/tests.yml
deleted file mode 100644
index 67e634f..0000000
--- a/winery-test-environment/tests.yml
+++ /dev/null
@@ -1,32 +0,0 @@
-- name: install test environment
-  gather_facts: no
-  hosts: mon
-
-  pre_tasks:
-
-    - name: apt install
-      apt:
-        name:
-          - emacs-nox
-          - gcc
-          - libcap-dev
-          - libcmph-dev
-          - libpq-dev
-          - postgresql-client-common
-          - postgresql-13
-          - python3-pip
-          - python3-rbd
-          - rsync
-          - tmux
-          - virtualenv
-      become: true
-
-    - name: configure venv
-      shell: |
-        virtualenv venv
-        venv/bin/pip3 install tox
-      args:
-        creates: venv
-        chdir: /home/debian
-      become: true
-      become_user: debian
-- 
GitLab


From 54c2a5ecf8bcc84a81ce30098be4c3b4038150df Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Thu, 27 Feb 2025 14:31:58 +0100
Subject: [PATCH 03/25] fix typo in readonly proxy exception

---
 swh/objstorage/proxies/readonly.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/swh/objstorage/proxies/readonly.py b/swh/objstorage/proxies/readonly.py
index 0b92a84..0139a51 100644
--- a/swh/objstorage/proxies/readonly.py
+++ b/swh/objstorage/proxies/readonly.py
@@ -65,4 +65,4 @@ class ReadOnlyProxyObjStorage(ObjStorage):
         raise ReadOnlyObjStorageError("restore")
 
     def delete(self, *args, **kwargs):
-        raise ReadOnlyObjStorageError("dalete")
+        raise ReadOnlyObjStorageError("delete")
-- 
GitLab


From b8050b517796238f5c32c7f4269731108e0dcfd7 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Thu, 27 Feb 2025 14:32:23 +0100
Subject: [PATCH 04/25] winery: explain why ObjNotFoundError gets re-raised

---
 swh/objstorage/backends/winery/objstorage.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
index 1aab1c4..03ca9c0 100644
--- a/swh/objstorage/backends/winery/objstorage.py
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -42,8 +42,9 @@ class WineryObjStorage(ObjStorage):
     def get(self, obj_id: ObjId) -> bytes:
         try:
             return self.winery.get(self._hash(obj_id))
-        except ObjNotFoundError:
-            raise ObjNotFoundError(obj_id)
+        except ObjNotFoundError as exc:
+            # re-raise exception with the passed obj_id instead of the internal winery obj_id.
+            raise ObjNotFoundError(obj_id) from exc
 
     def check_config(self, *, check_write: bool) -> bool:
         return True
-- 
GitLab


From aa5b36551e62176f1bc88aecb02ad42379a05e28 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Thu, 27 Feb 2025 14:33:05 +0100
Subject: [PATCH 05/25] winery: raise ReadOnlyObjstorageError when appropriate

---
 swh/objstorage/backends/winery/objstorage.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
index 03ca9c0..61e348a 100644
--- a/swh/objstorage/backends/winery/objstorage.py
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -9,7 +9,7 @@ from multiprocessing import Process
 from typing import Callable, Iterator, List, Optional, Tuple
 
 from swh.objstorage.constants import DEFAULT_LIMIT
-from swh.objstorage.exc import ObjNotFoundError
+from swh.objstorage.exc import ObjNotFoundError, ReadOnlyObjStorageError
 from swh.objstorage.interface import CompositeObjId, ObjId
 from swh.objstorage.objstorage import ObjStorage, timed
 
@@ -55,9 +55,13 @@ class WineryObjStorage(ObjStorage):
 
     @timed
     def add(self, content: bytes, obj_id: ObjId, check_presence: bool = True) -> None:
+        if not isinstance(self.winery, WineryWriter):
+            raise ReadOnlyObjStorageError("add")
         self.winery.add(content, self._hash(obj_id), check_presence)
 
     def delete(self, obj_id: ObjId):
+        if not isinstance(self.winery, WineryWriter):
+            raise ReadOnlyObjStorageError("delete")
         if not self.allow_delete:
             raise PermissionError("Delete is not allowed.")
         return self.winery.delete(self._hash(obj_id))
-- 
GitLab


From 1a7e444b83ad045d25123e0fbbc2e5c0961150a6 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Thu, 27 Feb 2025 19:18:59 +0100
Subject: [PATCH 06/25] winery: group settings into a more sensible hierarchy

This should allow refactorings to pass components only the settings that
they actually need, rather than ignoring all kwargs. It should also help
make the transition towards a configurable shards pool backend more tractable.
---
 swh/objstorage/backends/winery/objstorage.py  | 129 ++++++++-------
 swh/objstorage/backends/winery/roshard.py     |   7 +-
 swh/objstorage/backends/winery/settings.py    | 150 ++++++++++++++++++
 swh/objstorage/cli.py                         | 115 +++++---------
 .../tests/test_objstorage_winery.py           | 149 +++++++++--------
 .../tests/winery_testing_helpers.py           |   6 +-
 6 files changed, 336 insertions(+), 220 deletions(-)
 create mode 100644 swh/objstorage/backends/winery/settings.py

diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
index 61e348a..6a3b505 100644
--- a/swh/objstorage/backends/winery/objstorage.py
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -6,20 +6,15 @@
 from functools import partial
 import logging
 from multiprocessing import Process
-from typing import Callable, Iterator, List, Optional, Tuple
+from typing import Callable, Iterator, List, Optional
 
 from swh.objstorage.constants import DEFAULT_LIMIT
 from swh.objstorage.exc import ObjNotFoundError, ReadOnlyObjStorageError
 from swh.objstorage.interface import CompositeObjId, ObjId
 from swh.objstorage.objstorage import ObjStorage, timed
 
-from .roshard import (
-    DEFAULT_IMAGE_FEATURES_UNSUPPORTED,
-    Pool,
-    ROShard,
-    ROShardCreator,
-    ShardNotMapped,
-)
+from . import settings
+from .roshard import Pool, ROShard, ROShardCreator, ShardNotMapped
 from .rwshard import RWShard
 from .sharedbase import ShardState, SharedBase
 from .sleep import sleep_exponential
@@ -31,12 +26,33 @@ class WineryObjStorage(ObjStorage):
     PRIMARY_HASH = "sha256"
     name: str = "winery"
 
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        if kwargs.get("readonly"):
-            self.winery = WineryReader(**kwargs)
+    def __init__(
+        self,
+        database: settings.Database,
+        shards: settings.Shards,
+        shards_pool: settings.ShardsPool,
+        throttler: settings.Throttler,
+        packer: Optional[settings.Packer] = None,
+        readonly: bool = False,
+        allow_delete: bool = False,
+        name: str = "winery",
+    ) -> None:
+        super().__init__(allow_delete=allow_delete, name=name)
+
+        self.settings, legacy_kwargs = settings.populate_default_settings(
+            database=database,
+            shards=shards,
+            shards_pool=shards_pool,
+            throttler=throttler,
+            packer=(packer or {}),
+        )
+
+        self.winery: WineryReader | WineryWriter
+
+        if readonly:
+            self.winery = WineryReader(**legacy_kwargs)
         else:
-            self.winery = WineryWriter(**kwargs)
+            self.winery = WineryWriter(**legacy_kwargs)
 
     @timed
     def get(self, obj_id: ObjId) -> bytes:
@@ -325,25 +341,11 @@ def stop_after_shards(max_shards_packed: int) -> Callable[[int], bool]:
 
 
 def shard_packer(
-    base_dsn: str,
-    shard_max_size: int,
-    throttle_read: int,
-    throttle_write: int,
-    application_name: Optional[str] = None,
-    rbd_pool_name: str = "shards",
-    rbd_data_pool_name: Optional[str] = None,
-    rbd_image_features_unsupported: Tuple[
-        str, ...
-    ] = DEFAULT_IMAGE_FEATURES_UNSUPPORTED,
-    rbd_use_sudo: bool = True,
-    rbd_map_options: str = "",
-    rbd_create_images: bool = True,
-    rbd_wait_for_image: Callable[[int], None] = sleep_exponential(
-        min_duration=5,
-        factor=2,
-        max_duration=60,
-        message="Waiting for RBD image mapping",
-    ),
+    database: settings.Database,
+    shards: settings.Shards,
+    shards_pool: settings.ShardsPool,
+    throttler: settings.Throttler,
+    packer: Optional[settings.Packer] = None,
     stop_packing: Callable[[int], bool] = never_stop,
     wait_for_shard: Callable[[int], None] = sleep_exponential(
         min_duration=5,
@@ -357,21 +359,31 @@ def shard_packer(
     When no shards are available for packing, call the `wait_for_shard` function.
 
     Arguments:
-      base_dsn: PostgreSQL dsn for the shared database
-      shard_max_size: Max size of a shard (used to size new shards)
-      throttle_read: reads per second
-      throttle_write: writes per second
-      application_name: the application name sent to PostgreSQL
-      rbd_create_images: create images directly (or wait for RBD mapper)
-      rbd_wait_for_image: sleep function called to wait for an image (when
-       `rbd_create_images`=`False`)
-      rbd_*: passed directly to :class:`roshard.Pool`
+      database: database settings (e.g. db connection string)
+      shards: shards settings (e.g. max_size)
+      shards_pool: shards pool settings (e.g. Ceph RBD settings)
+      throttler: throttler settings
+      packer: packer settings
       stop_packing: callback to determine whether the packer should exit
       wait_for_shard: sleep function called when no shards are available to be packed
     """
-    application_name = application_name or "Winery Shard Packer"
 
-    base = SharedBase(base_dsn=base_dsn, application_name=application_name)
+    all_settings, legacy_kwargs = settings.populate_default_settings(
+        database=database,
+        shards=shards,
+        shards_pool=shards_pool,
+        throttler=throttler,
+        packer=(packer or {}),
+    )
+
+    application_name = (
+        all_settings["database"]["application_name"] or "Winery Shard Packer"
+    )
+
+    base = SharedBase(
+        base_dsn=all_settings["database"]["db"],
+        application_name=application_name,
+    )
 
     shards_packed = 0
     waited_for_shards = 0
@@ -389,22 +401,7 @@ def shard_packer(
 
         with locked:
             logger.info("shard_packer: Locked shard %s to pack", locked.name)
-            ret = pack(
-                locked.name,
-                base_dsn=base_dsn,
-                shard_max_size=shard_max_size,
-                shared_base=base,
-                throttle_read=throttle_read,
-                throttle_write=throttle_write,
-                application_name=application_name,
-                rbd_use_sudo=rbd_use_sudo,
-                rbd_map_options=rbd_map_options,
-                rbd_create_images=rbd_create_images,
-                rbd_wait_for_image=rbd_wait_for_image,
-                rbd_pool_name=rbd_pool_name,
-                rbd_data_pool_name=rbd_data_pool_name,
-                rbd_image_features_unsupported=rbd_image_features_unsupported,
-            )
+            ret = pack(locked.name, shared_base=base, **legacy_kwargs)
             if not ret:
                 raise ValueError("Packing shard %s failed" % locked.name)
             shards_packed += 1
@@ -413,9 +410,8 @@ def shard_packer(
 
 
 def rw_shard_cleaner(
-    base_dsn: str,
+    database: settings.Database,
     min_mapped_hosts: int,
-    application_name: Optional[str] = None,
     stop_cleaning: Callable[[int], bool] = never_stop,
     wait_for_shard: Callable[[int], None] = sleep_exponential(
         min_duration=5,
@@ -429,15 +425,15 @@ def rw_shard_cleaner(
     When no shards are available for packing, call the `wait_for_shard` function.
 
     Arguments:
-      base_dsn: PostgreSQL dsn for the shared database
+      database: database settings (e.g. db connection string)
       min_mapped_hosts: how many hosts should have mapped the image read-only before
         cleaning it
-      application_name: the application name sent to PostgreSQL
       stop_cleaning: callback to determine whether the cleaner should exit
       wait_for_shard: sleep function called when no shards are available to be cleaned
     """
-    application_name = application_name or "Winery RW shard cleaner"
-    base = SharedBase(base_dsn=base_dsn, application_name=application_name)
+    database = settings.database_settings_with_defaults(database)
+
+    base = SharedBase(base_dsn=database["db"])
 
     shards_cleaned = 0
     waited_for_shards = 0
@@ -460,9 +456,8 @@ def rw_shard_cleaner(
 
             ret = cleanup_rw_shard(
                 locked.name,
-                base_dsn=base_dsn,
+                base_dsn=database["db"],
                 shared_base=base,
-                application_name=application_name,
             )
             if not ret:
                 raise ValueError("Cleaning shard %s failed" % locked.name)
diff --git a/swh/objstorage/backends/winery/roshard.py b/swh/objstorage/backends/winery/roshard.py
index 6ffc838..e762d8a 100644
--- a/swh/objstorage/backends/winery/roshard.py
+++ b/swh/objstorage/backends/winery/roshard.py
@@ -19,16 +19,13 @@ from systemd.daemon import notify
 
 from swh.perfecthash import Shard, ShardCreator
 
+from . import settings
 from .sharedbase import ShardState, SharedBase
 from .sleep import sleep_exponential
 from .throttler import Throttler
 
 logger = logging.getLogger(__name__)
 
-# This would be used for image features that are not supported by the kernel RBD
-# driver, e.g. exclusive-lock, object-map and fast-diff for kernels < 5.3
-DEFAULT_IMAGE_FEATURES_UNSUPPORTED: Tuple[str, ...] = ()
-
 
 class ShardNotMapped(Exception):
     pass
@@ -57,7 +54,7 @@ class Pool(object):
         rbd_data_pool_name: Optional[str] = None,
         rbd_image_features_unsupported: Tuple[
             str, ...
-        ] = DEFAULT_IMAGE_FEATURES_UNSUPPORTED,
+        ] = settings.DEFAULT_IMAGE_FEATURES_UNSUPPORTED,
         rbd_map_options: str = "",
     ) -> None:
         self.use_sudo = rbd_use_sudo
diff --git a/swh/objstorage/backends/winery/settings.py b/swh/objstorage/backends/winery/settings.py
new file mode 100644
index 0000000..2004475
--- /dev/null
+++ b/swh/objstorage/backends/winery/settings.py
@@ -0,0 +1,150 @@
+# Copyright (C) 2025  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from typing import Any, Dict, Literal, NotRequired, Optional, Tuple, TypedDict
+
+# This would be used for image features that are not supported by the kernel RBD
+# driver, e.g. exclusive-lock, object-map and fast-diff for kernels < 5.3
+DEFAULT_IMAGE_FEATURES_UNSUPPORTED: Tuple[str, ...] = ()
+
+
+class Packer(TypedDict):
+    """Settings for the packer process, either external or internal"""
+
+    pack_immediately: NotRequired[bool]
+    """Immediately pack shards (in a separate thread) when overflowing"""
+    clean_immediately: NotRequired[bool]
+    """Immediately clean shards when packing is complete"""
+
+
+def packer_settings_with_defaults(values: Packer) -> Packer:
+    """Hydrate Packer settings with default values"""
+    return {"pack_immediately": True, "clean_immediately": True, **values}
+
+
+class Shards(TypedDict):
+    """Settings for shard management"""
+
+    max_size: int
+    """Maximum cumulative size of objects in a shard"""
+    rw_idle_timeout: NotRequired[float]
+    """Timeout (seconds) after which write shards get released when idle"""
+
+
+def shards_settings_with_defaults(values: Shards) -> Shards:
+    """Hydrate Shards settings with default values"""
+    return {"rw_idle_timeout": 300, **values}
+
+
+class ShardsPool(TypedDict):
+    """Settings for the Shards pool"""
+
+    type: Literal["rbd"]
+
+
+class RbdShardsPool(TypedDict):
+    """Settings for the Ceph RBD-based Shards pool"""
+
+    type: Literal["rbd"]
+    use_sudo: NotRequired[bool]
+    map_options: NotRequired[str]
+    pool_name: NotRequired[str]
+    data_pool_name: NotRequired[Optional[str]]
+    image_features_unsupported: NotRequired[Tuple[str, ...]]
+
+
+def rbd_shards_pool_settings_with_defaults(
+    values: ShardsPool,
+) -> RbdShardsPool:
+    """Hydrate RbdShards settings with default values"""
+    return {
+        "type": "rbd",
+        "use_sudo": True,
+        "pool_name": "shards",
+        "data_pool_name": None,
+        "image_features_unsupported": DEFAULT_IMAGE_FEATURES_UNSUPPORTED,
+        "map_options": "",
+        **values,
+    }
+
+
+class Throttler(TypedDict):
+    """Settings for the winery throttler"""
+
+    max_read_bps: int
+    max_write_bps: int
+
+
+class Database(TypedDict):
+    """Settings for the winery database"""
+
+    db: str
+    """Database connection string"""
+    application_name: NotRequired[Optional[str]]
+    """Application name for the database connection"""
+
+
+def database_settings_with_defaults(values: Database) -> Database:
+    """Hydrate Database settings with defaults"""
+    return {"application_name": None, **values}
+
+
+class Winery(TypedDict, total=False):
+    """A representation of all available winery settings"""
+
+    database: Database
+    shards: Shards
+    shards_pool: RbdShardsPool
+    throttler: Throttler
+    packer: Packer
+
+
+SETTINGS = frozenset({"database", "shards", "shards_pool", "throttler", "packer"})
+
+
+def populate_default_settings(
+    database: Optional[Database] = None,
+    shards: Optional[Shards] = None,
+    shards_pool: Optional[ShardsPool] = None,
+    throttler: Optional[Throttler] = None,
+    packer: Optional[Packer] = None,
+) -> Tuple[Winery, Dict[str, Any]]:
+    """Given some settings for a Winery objstorage, add all the appropriate
+    default settings."""
+    settings: Winery = {}
+    legacy_kwargs: Dict[str, Any] = {}
+
+    if database is not None:
+        database = database_settings_with_defaults(database)
+        settings["database"] = database
+        legacy_kwargs["base_dsn"] = database["db"]
+        legacy_kwargs["application_name"] = database["application_name"]
+
+    if shards is not None:
+        shards = shards_settings_with_defaults(shards)
+        settings["shards"] = shards
+        legacy_kwargs["shard_max_size"] = shards["max_size"]
+        legacy_kwargs["rwshard_idle_timeout"] = shards["rw_idle_timeout"]
+
+    if shards_pool is not None:
+        if shards_pool["type"] == "rbd":
+            shards_pool = rbd_shards_pool_settings_with_defaults(shards_pool)
+            settings["shards_pool"] = shards_pool
+            for k, v in shards_pool.items():
+                legacy_kwargs[f"rbd_{k}"] = v
+        else:
+            raise ValueError(f"Unknown shards pool type: {shards_pool['type']}")
+
+    if throttler is not None:
+        settings["throttler"] = throttler
+        legacy_kwargs["throttle_read"] = throttler["max_read_bps"]
+        legacy_kwargs["throttle_write"] = throttler["max_write_bps"]
+
+    if packer is not None:
+        packer = packer_settings_with_defaults(packer)
+        settings["packer"] = packer
+        legacy_kwargs.update(packer)
+
+    return settings, legacy_kwargs
diff --git a/swh/objstorage/cli.py b/swh/objstorage/cli.py
index 7e0f7da..7ab915c 100644
--- a/swh/objstorage/cli.py
+++ b/swh/objstorage/cli.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2020  The Software Heritage developers
+# Copyright (C) 2015-2025  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -102,6 +102,15 @@ def winery(ctx):
     if config["cls"] != "winery":
         raise click.ClickException("winery packer only works on a winery objstorage")
 
+    from swh.objstorage.backends.winery.settings import (
+        SETTINGS,
+        populate_default_settings,
+    )
+
+    ctx.obj["winery_settings"], ctx.obj["winery_legacy_kwargs"] = (
+        populate_default_settings(**{k: v for k, v in config.items() if k in SETTINGS})
+    )
+
 
 @winery.command("packer")
 @click.option("--stop-after-shards", type=click.INT, default=None)
@@ -111,11 +120,8 @@ def winery_packer(ctx, stop_after_shards: Optional[int] = None):
     import signal
 
     from swh.objstorage.backends.winery.objstorage import shard_packer
-    from swh.objstorage.backends.winery.roshard import (
-        DEFAULT_IMAGE_FEATURES_UNSUPPORTED,
-    )
 
-    config = ctx.obj["config"]["objstorage"]
+    settings = ctx.obj["winery_settings"]
 
     signal_received = False
 
@@ -130,53 +136,28 @@ def winery_packer(ctx, stop_after_shards: Optional[int] = None):
         logger.warning("Received signal %s, exiting", signal.strsignal(signum))
         signal_received = True
 
-    base_dsn = config["base_dsn"]
-    shard_max_size = config["shard_max_size"]
-    throttle_read = config.get("throttle_read", 200 * 1024 * 1024)
-    throttle_write = config.get("throttle_write", 200 * 1024 * 1024)
-    rbd_pool_name = config.get("rbd_pool_name", "shards")
-    rbd_data_pool_name = config.get("rbd_data_pool_name")
-    rbd_use_sudo = config.get("rbd_use_sudo", True)
-    rbd_image_features_unsupported = tuple(
-        config.get("rbd_image_features_unsupported", DEFAULT_IMAGE_FEATURES_UNSUPPORTED)
-    )
-    rbd_create_images = config.get("rbd_create_images", True)
-    rbd_map_options = config.get("rbd_map_options", "")
-
     signal.signal(signal.SIGINT, set_signal_received)
     signal.signal(signal.SIGTERM, set_signal_received)
 
-    ret = shard_packer(
-        base_dsn=base_dsn,
-        shard_max_size=shard_max_size,
-        throttle_read=throttle_read,
-        throttle_write=throttle_write,
-        rbd_pool_name=rbd_pool_name,
-        rbd_data_pool_name=rbd_data_pool_name,
-        rbd_image_features_unsupported=rbd_image_features_unsupported,
-        rbd_use_sudo=rbd_use_sudo,
-        rbd_map_options=rbd_map_options,
-        rbd_create_images=rbd_create_images,
-        stop_packing=stop_packing,
-    )
+    ret = shard_packer(**settings, stop_packing=stop_packing)
 
     logger.info("Packed %s shards", ret)
 
 
 @winery.command("rbd")
 @click.option("--stop-instead-of-waiting", is_flag=True)
+@click.option("--manage-rw-images", is_flag=True)
 @click.pass_context
-def winery_rbd(ctx, stop_instead_of_waiting: bool = False):
+def winery_rbd(
+    ctx, stop_instead_of_waiting: bool = False, manage_rw_images: bool = True
+):
     """Run a winery RBD image manager process"""
     import signal
 
-    from swh.objstorage.backends.winery.roshard import (
-        DEFAULT_IMAGE_FEATURES_UNSUPPORTED,
-        Pool,
-    )
+    from swh.objstorage.backends.winery.roshard import Pool
     from swh.objstorage.backends.winery.sleep import sleep_exponential
 
-    config = ctx.obj["config"]["objstorage"]
+    legacy_kwargs = ctx.obj["winery_legacy_kwargs"]
 
     stop_on_next_iteration = False
 
@@ -202,32 +183,21 @@ def winery_rbd(ctx, stop_instead_of_waiting: bool = False):
         logger.warning("Received signal %s, exiting", signal.strsignal(signum))
         stop_on_next_iteration = True
 
-    base_dsn = config["base_dsn"]
-    shard_max_size = config["shard_max_size"]
-    rbd_pool_name = config.get("rbd_pool_name", "shards")
-    rbd_data_pool_name = config.get("rbd_data_pool_name")
-    rbd_use_sudo = config.get("rbd_use_sudo", True)
-    rbd_image_features_unsupported = tuple(
-        config.get("rbd_image_features_unsupported", DEFAULT_IMAGE_FEATURES_UNSUPPORTED)
-    )
-    rbd_manage_rw_images = config.get("rbd_manage_rw_images", True)
-    rbd_map_options = config.get("rbd_map_options", "")
-
     signal.signal(signal.SIGINT, set_signal_received)
     signal.signal(signal.SIGTERM, set_signal_received)
 
     pool = Pool(
-        shard_max_size=shard_max_size,
-        rbd_pool_name=rbd_pool_name,
-        rbd_data_pool_name=rbd_data_pool_name,
-        rbd_use_sudo=rbd_use_sudo,
-        rbd_image_features_unsupported=rbd_image_features_unsupported,
-        rbd_map_options=rbd_map_options,
+        shard_max_size=legacy_kwargs["shard_max_size"],
+        rbd_pool_name=legacy_kwargs["rbd_pool_name"],
+        rbd_data_pool_name=legacy_kwargs["rbd_data_pool_name"],
+        rbd_use_sudo=legacy_kwargs["rbd_use_sudo"],
+        rbd_image_features_unsupported=legacy_kwargs["rbd_image_features_unsupported"],
+        rbd_map_options=legacy_kwargs["rbd_map_options"],
     )
 
     pool.manage_images(
-        base_dsn=base_dsn,
-        manage_rw_images=rbd_manage_rw_images,
+        base_dsn=legacy_kwargs["base_dsn"],
+        manage_rw_images=manage_rw_images,
         wait_for_image=wait_for_image,
         stop_running=stop_running,
     )
@@ -257,7 +227,7 @@ def winery_rw_shard_cleaner(
     from swh.objstorage.backends.winery.objstorage import rw_shard_cleaner
     from swh.objstorage.backends.winery.sleep import sleep_exponential
 
-    config = ctx.obj["config"]["objstorage"]
+    settings = ctx.obj["winery_settings"]
 
     stop_on_next_iteration = False
 
@@ -285,13 +255,11 @@ def winery_rw_shard_cleaner(
         logger.warning("Received signal %s, exiting", signal.strsignal(signum))
         stop_on_next_iteration = True
 
-    base_dsn = config["base_dsn"]
-
     signal.signal(signal.SIGINT, set_signal_received)
     signal.signal(signal.SIGTERM, set_signal_received)
 
     ret = rw_shard_cleaner(
-        base_dsn=base_dsn,
+        database=settings["database"],
         min_mapped_hosts=min_mapped_hosts,
         stop_cleaning=stop_cleaning,
         wait_for_shard=wait_for_shard,
@@ -307,21 +275,10 @@ def winery_clean_deleted_objects(ctx):
     import signal
 
     from swh.objstorage.backends.winery.objstorage import deleted_objects_cleaner
-    from swh.objstorage.backends.winery.roshard import (
-        DEFAULT_IMAGE_FEATURES_UNSUPPORTED,
-        Pool,
-    )
+    from swh.objstorage.backends.winery.roshard import Pool
     from swh.objstorage.backends.winery.sharedbase import SharedBase
 
-    config = ctx.obj["config"]["objstorage"]
-    base_dsn = config["base_dsn"]
-    shard_max_size = config["shard_max_size"]
-    rbd_pool_name = config.get("rbd_pool_name", "shards")
-    rbd_data_pool_name = config.get("rbd_data_pool_name")
-    rbd_use_sudo = config.get("rbd_use_sudo", True)
-    rbd_image_features_unsupported = tuple(
-        config.get("rbd_image_features_unsupported", DEFAULT_IMAGE_FEATURES_UNSUPPORTED)
-    )
+    legacy_kwargs = ctx.obj["legacy_kwargs"]
 
     stop_on_next_iteration = False
 
@@ -337,14 +294,14 @@ def winery_clean_deleted_objects(ctx):
     signal.signal(signal.SIGINT, set_signal_received)
     signal.signal(signal.SIGTERM, set_signal_received)
 
-    base = SharedBase(base_dsn=base_dsn)
+    base = SharedBase(base_dsn=legacy_kwargs["base_dsn"])
 
     pool = Pool(
-        shard_max_size=shard_max_size,
-        rbd_pool_name=rbd_pool_name,
-        rbd_data_pool_name=rbd_data_pool_name,
-        rbd_use_sudo=rbd_use_sudo,
-        rbd_image_features_unsupported=rbd_image_features_unsupported,
+        shard_max_size=legacy_kwargs["shard_max_size"],
+        rbd_pool_name=legacy_kwargs["rbd_pool_name"],
+        rbd_data_pool_name=legacy_kwargs["rbd_data_pool_name"],
+        rbd_use_sudo=legacy_kwargs["rbd_use_sudo"],
+        rbd_image_features_unsupported=legacy_kwargs["rbd_image_features_unsupported"],
     )
 
     deleted_objects_cleaner(base, pool, stop_running)
diff --git a/swh/objstorage/tests/test_objstorage_winery.py b/swh/objstorage/tests/test_objstorage_winery.py
index a238394..651a716 100644
--- a/swh/objstorage/tests/test_objstorage_winery.py
+++ b/swh/objstorage/tests/test_objstorage_winery.py
@@ -27,6 +27,7 @@ from swh.objstorage.backends.winery.objstorage import (
     shard_packer,
     stop_after_shards,
 )
+import swh.objstorage.backends.winery.settings as settings
 from swh.objstorage.backends.winery.sharedbase import ShardState, SharedBase
 from swh.objstorage.backends.winery.sleep import sleep_exponential
 from swh.objstorage.backends.winery.throttler import (
@@ -195,25 +196,39 @@ def clean_immediately(request) -> bool:
 
 
 @pytest.fixture
-def storage(
+def winery_settings(
+    postgresql_dsn,
     shard_max_size,
     pack_immediately,
     clean_immediately,
     rbd_pool_name,
     rbd_map_options,
+) -> settings.Winery:
+    return dict(
+        shards={"max_size": shard_max_size},
+        database={"db": postgresql_dsn},
+        throttler={
+            "max_write_bps": 200 * 1024 * 1024,
+            "max_read_bps": 100 * 1024 * 1024,
+        },
+        packer={
+            "pack_immediately": pack_immediately,
+            "clean_immediately": clean_immediately,
+        },
+        shards_pool={
+            "type": "rbd",
+            "pool_name": rbd_pool_name,
+            "map_options": rbd_map_options,
+        },
+    )
+
+
+@pytest.fixture
+def storage(
+    winery_settings,
     postgresql_dsn,
 ):
-    storage = get_objstorage(
-        cls="winery",
-        base_dsn=postgresql_dsn,
-        shard_max_size=shard_max_size,
-        throttle_write=200 * 1024 * 1024,
-        throttle_read=100 * 1024 * 1024,
-        pack_immediately=pack_immediately,
-        clean_immediately=clean_immediately,
-        rbd_pool_name=rbd_pool_name,
-        rbd_map_options=rbd_map_options,
-    )
+    storage = get_objstorage(cls="winery", **winery_settings)
     assert isinstance(storage, WineryObjStorage)
     logger.debug("Instantiated storage %s on rbd pool %s", storage, rbd_pool_name)
     yield storage
@@ -272,7 +287,7 @@ def test_winery_add_get(winery):
     winery.shard.drop()
 
 
-def test_winery_add_concurrent(winery, mocker):
+def test_winery_add_concurrent(winery, winery_settings, mocker):
     num_threads = 4
 
     class ManualReleaseSharedBase(SharedBase):
@@ -297,7 +312,9 @@ def test_winery_add_concurrent(winery, mocker):
 
         assert my_storage.get(obj_id) == content
 
-    storages = [get_objstorage(cls="winery", **winery.args) for _ in range(num_threads)]
+    storages = [
+        get_objstorage(cls="winery", **winery_settings) for _ in range(num_threads)
+    ]
 
     threads = [
         threading.Thread(target=add_object, args=[storage]) for storage in storages
@@ -550,7 +567,8 @@ def test_winery_sleep_exponential_negative():
 
 @pytest.mark.shard_max_size(1024)
 @pytest.mark.pack_immediately(False)
-def test_winery_standalone_packer(shard_max_size, image_pool, postgresql_dsn, storage):
+@pytest.mark.clean_immediately(False)
+def test_winery_standalone_packer(winery_settings, image_pool, storage):
     # create 4 shards
     for i in range(16):
         content = i.to_bytes(256, "little")
@@ -567,13 +585,8 @@ def test_winery_standalone_packer(shard_max_size, image_pool, postgresql_dsn, st
     # Pack a single shard
     assert (
         shard_packer(
-            base_dsn=postgresql_dsn,
-            shard_max_size=shard_max_size,
-            throttle_read=200 * 1024 * 1024,
-            throttle_write=200 * 1024 * 1024,
+            **winery_settings,
             stop_packing=stop_after_shards(1),
-            rbd_pool_name=image_pool.pool_name,
-            rbd_map_options=image_pool.map_options,
         )
         == 1
     )
@@ -587,7 +600,7 @@ def test_winery_standalone_packer(shard_max_size, image_pool, postgresql_dsn, st
     # Clean up the RW shard for the packed one
     assert (
         rw_shard_cleaner(
-            base_dsn=postgresql_dsn,
+            database=winery_settings["database"],
             min_mapped_hosts=0,
             stop_cleaning=stop_after_shards(1),
         )
@@ -603,13 +616,8 @@ def test_winery_standalone_packer(shard_max_size, image_pool, postgresql_dsn, st
     # Pack all remaining shards
     assert (
         shard_packer(
-            base_dsn=postgresql_dsn,
-            shard_max_size=shard_max_size,
-            throttle_read=200 * 1024 * 1024,
-            throttle_write=200 * 1024 * 1024,
+            **winery_settings,
             stop_packing=stop_after_shards(3),
-            rbd_pool_name=image_pool.pool_name,
-            rbd_map_options=image_pool.map_options,
         )
         == 3
     )
@@ -623,7 +631,7 @@ def test_winery_standalone_packer(shard_max_size, image_pool, postgresql_dsn, st
     # Clean up the RW shard for the packed one
     assert (
         rw_shard_cleaner(
-            base_dsn=postgresql_dsn,
+            database=winery_settings["database"],
             min_mapped_hosts=0,
             stop_cleaning=stop_after_shards(3),
         )
@@ -636,8 +644,9 @@ def test_winery_standalone_packer(shard_max_size, image_pool, postgresql_dsn, st
 
 @pytest.mark.shard_max_size(1024)
 @pytest.mark.pack_immediately(False)
+@pytest.mark.clean_immediately(False)
 def test_winery_packer_clean_up_interrupted_shard(
-    shard_max_size, image_pool, postgresql_dsn, storage, caplog
+    image_pool, winery_settings, storage, caplog
 ):
     caplog.set_level(logging.CRITICAL)
 
@@ -661,13 +670,12 @@ def test_winery_packer_clean_up_interrupted_shard(
     with caplog.at_level(logging.WARNING, "swh.objstorage.backends.winery.roshard"):
         # Pack a single shard
         ret = shard_packer(
-            base_dsn=postgresql_dsn,
-            shard_max_size=shard_max_size,
-            throttle_read=200 * 1024 * 1024,
-            throttle_write=200 * 1024 * 1024,
+            database=winery_settings["database"],
+            shards=winery_settings["shards"],
+            shards_pool={**winery_settings["shards_pool"], "create_images": False},
+            throttler=winery_settings["throttler"],
+            packer=winery_settings.get("packer"),
             stop_packing=stop_after_shards(1),
-            rbd_pool_name=image_pool.pool_name,
-            rbd_create_images=False,
         )
 
     assert ret == 1
@@ -689,7 +697,7 @@ def test_winery_packer_clean_up_interrupted_shard(
 @pytest.mark.shard_max_size(1024)
 @pytest.mark.pack_immediately(False)
 @pytest.mark.clean_immediately(False)
-def test_winery_cli_packer(image_pool, storage, tmp_path, cli_runner):
+def test_winery_cli_packer(image_pool, storage, tmp_path, winery_settings, cli_runner):
     # create 4 shards
     for i in range(16):
         content = i.to_bytes(256, "little")
@@ -704,9 +712,7 @@ def test_winery_cli_packer(image_pool, storage, tmp_path, cli_runner):
         assert shard_info[shard] == ShardState.FULL
 
     with open(tmp_path / "config.yml", "w") as f:
-        yaml.safe_dump(
-            {"objstorage": {"cls": "winery", **storage.winery.args}}, stream=f
-        )
+        yaml.safe_dump({"objstorage": {"cls": "winery", **winery_settings}}, stream=f)
 
     result = cli_runner.invoke(
         swh_cli_group,
@@ -724,7 +730,9 @@ def test_winery_cli_packer(image_pool, storage, tmp_path, cli_runner):
 @pytest.mark.shard_max_size(1024)
 @pytest.mark.pack_immediately(False)
 @pytest.mark.clean_immediately(False)
-def test_winery_cli_packer_rollback_on_error(image_pool, storage, tmp_path, cli_runner):
+def test_winery_cli_packer_rollback_on_error(
+    image_pool, storage, tmp_path, winery_settings, cli_runner
+):
     # create 4 shards
     for i in range(16):
         content = i.to_bytes(256, "little")
@@ -739,9 +747,7 @@ def test_winery_cli_packer_rollback_on_error(image_pool, storage, tmp_path, cli_
         assert shard_info[shard] == ShardState.FULL
 
     with open(tmp_path / "config.yml", "w") as f:
-        yaml.safe_dump(
-            {"objstorage": {"cls": "winery", **storage.winery.args}}, stream=f
-        )
+        yaml.safe_dump({"objstorage": {"cls": "winery", **winery_settings}}, stream=f)
 
     # pytest-mock doesn't seem to interact very well with the cli_runner
     def failing_pack(*args, **kwargs):
@@ -769,7 +775,7 @@ def test_winery_cli_packer_rollback_on_error(image_pool, storage, tmp_path, cli_
 
 @pytest.mark.shard_max_size(1024)
 @pytest.mark.pack_immediately(False)
-def test_winery_cli_rbd(image_pool, storage, tmp_path, cli_runner):
+def test_winery_cli_rbd(image_pool, storage, tmp_path, winery_settings, cli_runner):
     # create 4 shards
     for i in range(16):
         content = i.to_bytes(256, "little")
@@ -784,13 +790,34 @@ def test_winery_cli_rbd(image_pool, storage, tmp_path, cli_runner):
         assert shard_info[shard] == ShardState.FULL
 
     with open(tmp_path / "config.yml", "w") as f:
-        yaml.safe_dump(
-            {"objstorage": {"cls": "winery", **storage.winery.args}}, stream=f
-        )
+        yaml.safe_dump({"objstorage": {"cls": "winery", **winery_settings}}, stream=f)
 
     result = cli_runner.invoke(
         swh_cli_group,
-        ("objstorage", "winery", "rbd", "--stop-instead-of-waiting"),
+        (
+            "objstorage",
+            "winery",
+            "rbd",
+            "--stop-instead-of-waiting",
+        ),
+        env={"SWH_CONFIG_FILENAME": str(tmp_path / "config.yml")},
+    )
+
+    assert result.exit_code == 0
+
+    # The RBD shard mapper was run in "read-only" mode
+    for shard in filled:
+        assert image_pool.image_mapped(shard) is None
+
+    result = cli_runner.invoke(
+        swh_cli_group,
+        (
+            "objstorage",
+            "winery",
+            "rbd",
+            "--stop-instead-of-waiting",
+            "--manage-rw-images",
+        ),
         env={"SWH_CONFIG_FILENAME": str(tmp_path / "config.yml")},
     )
 
@@ -818,7 +845,7 @@ def test_winery_cli_rbd(image_pool, storage, tmp_path, cli_runner):
 @pytest.mark.pack_immediately(True)
 @pytest.mark.clean_immediately(False)
 def test_winery_cli_rw_shard_cleaner(
-    image_pool, postgresql_dsn, storage, tmp_path, cli_runner
+    image_pool, postgresql_dsn, storage, tmp_path, winery_settings, cli_runner
 ):
     # create 4 shards
     for i in range(16):
@@ -838,9 +865,7 @@ def test_winery_cli_rw_shard_cleaner(
         assert shard_info[shard] == ShardState.PACKED
 
     with open(tmp_path / "config.yml", "w") as f:
-        yaml.safe_dump(
-            {"objstorage": {"cls": "winery", **storage.winery.args}}, stream=f
-        )
+        yaml.safe_dump({"objstorage": {"cls": "winery", **winery_settings}}, stream=f)
 
     shard_tables = set(storage.winery.base.list_shard_tables())
     for shard in filled:
@@ -883,7 +908,7 @@ def test_winery_cli_rw_shard_cleaner(
 @pytest.mark.pack_immediately(True)
 @pytest.mark.clean_immediately(False)
 def test_winery_cli_rw_shard_cleaner_rollback_on_error(
-    image_pool, postgresql_dsn, storage, tmp_path, cli_runner
+    image_pool, postgresql_dsn, storage, tmp_path, winery_settings, cli_runner
 ):
     # create 4 shards
     for i in range(16):
@@ -903,9 +928,7 @@ def test_winery_cli_rw_shard_cleaner_rollback_on_error(
         assert shard_info[shard] == ShardState.PACKED
 
     with open(tmp_path / "config.yml", "w") as f:
-        yaml.safe_dump(
-            {"objstorage": {"cls": "winery", **storage.winery.args}}, stream=f
-        )
+        yaml.safe_dump({"objstorage": {"cls": "winery", **winery_settings}}, stream=f)
 
     shard_tables = set(storage.winery.base.list_shard_tables())
     for shard in filled:
@@ -944,8 +967,9 @@ def test_winery_cli_rw_shard_cleaner_rollback_on_error(
 
 @pytest.mark.shard_max_size(1024)
 @pytest.mark.pack_immediately(False)
+@pytest.mark.clean_immediately(False)
 def test_winery_standalone_packer_never_stop_packing(
-    image_pool, postgresql_dsn, shard_max_size, storage
+    image_pool, postgresql_dsn, shard_max_size, storage, winery_settings
 ):
     # create 4 shards
     for i in range(16):
@@ -972,13 +996,8 @@ def test_winery_standalone_packer_never_stop_packing(
 
     with pytest.raises(NoShardLeft):
         shard_packer(
-            base_dsn=postgresql_dsn,
-            shard_max_size=shard_max_size,
-            throttle_read=200 * 1024 * 1024,
-            throttle_write=200 * 1024 * 1024,
+            **winery_settings,
             wait_for_shard=wait_five_times,
-            rbd_pool_name=image_pool.pool_name,
-            rbd_map_options=image_pool.map_options,
         )
 
     assert called == list(range(5))
@@ -990,7 +1009,7 @@ def test_winery_standalone_packer_never_stop_packing(
 
     with pytest.raises(NoShardLeft):
         rw_shard_cleaner(
-            base_dsn=postgresql_dsn,
+            database=winery_settings["database"],
             min_mapped_hosts=0,
             wait_for_shard=wait_five_times,
         )
diff --git a/swh/objstorage/tests/winery_testing_helpers.py b/swh/objstorage/tests/winery_testing_helpers.py
index 00239b0..404e403 100644
--- a/swh/objstorage/tests/winery_testing_helpers.py
+++ b/swh/objstorage/tests/winery_testing_helpers.py
@@ -10,10 +10,8 @@ from pathlib import Path
 from subprocess import CalledProcessError
 from typing import Iterable, List, Optional, Tuple
 
-from swh.objstorage.backends.winery.roshard import (
-    DEFAULT_IMAGE_FEATURES_UNSUPPORTED,
-    Pool,
-)
+from swh.objstorage.backends.winery.roshard import Pool
+from swh.objstorage.backends.winery.settings import DEFAULT_IMAGE_FEATURES_UNSUPPORTED
 
 logger = logging.getLogger(__name__)
 
-- 
GitLab


From 4ee1e6328cac09f3aaa518405003f2f3c6677e6b Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Fri, 28 Feb 2025 13:48:55 +0100
Subject: [PATCH 07/25] Split WineryWriter and WineryReader instead of
 inheriting

This should help limit the blast radius of future shards pool management
api changes.
---
 swh/objstorage/backends/winery/objstorage.py  |  58 ++--
 .../tests/test_objstorage_winery.py           | 252 +++++++++---------
 2 files changed, 159 insertions(+), 151 deletions(-)

diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
index 6a3b505..3b66531 100644
--- a/swh/objstorage/backends/winery/objstorage.py
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -47,17 +47,17 @@ class WineryObjStorage(ObjStorage):
             packer=(packer or {}),
         )
 
-        self.winery: WineryReader | WineryWriter
+        self.reader = WineryReader(**legacy_kwargs)
 
         if readonly:
-            self.winery = WineryReader(**legacy_kwargs)
+            self.writer = None
         else:
-            self.winery = WineryWriter(**legacy_kwargs)
+            self.writer = WineryWriter(**legacy_kwargs)
 
     @timed
     def get(self, obj_id: ObjId) -> bytes:
         try:
-            return self.winery.get(self._hash(obj_id))
+            return self.reader.get(self._hash(obj_id))
         except ObjNotFoundError as exc:
             # re-raise exception with the passed obj_id instead of the internal winery obj_id.
             raise ObjNotFoundError(obj_id) from exc
@@ -67,20 +67,27 @@ class WineryObjStorage(ObjStorage):
 
     @timed
     def __contains__(self, obj_id: ObjId) -> bool:
-        return self._hash(obj_id) in self.winery
+        return self._hash(obj_id) in self.reader
 
     @timed
     def add(self, content: bytes, obj_id: ObjId, check_presence: bool = True) -> None:
-        if not isinstance(self.winery, WineryWriter):
+        if not self.writer:
             raise ReadOnlyObjStorageError("add")
-        self.winery.add(content, self._hash(obj_id), check_presence)
+        internal_obj_id = self._hash(obj_id)
+        if check_presence and internal_obj_id in self.reader:
+            return
+        self.writer.add(content, internal_obj_id)
 
     def delete(self, obj_id: ObjId):
-        if not isinstance(self.winery, WineryWriter):
+        if not self.writer:
             raise ReadOnlyObjStorageError("delete")
         if not self.allow_delete:
             raise PermissionError("Delete is not allowed.")
-        return self.winery.delete(self._hash(obj_id))
+        try:
+            return self.writer.delete(self._hash(obj_id))
+        # Re-raise ObjNotFoundError with the full object id
+        except ObjNotFoundError as exc:
+            raise ObjNotFoundError(obj_id) from exc
 
     def _hash(self, obj_id: ObjId) -> bytes:
         return obj_id[self.PRIMARY_HASH]
@@ -88,7 +95,7 @@ class WineryObjStorage(ObjStorage):
     def __iter__(self) -> Iterator[CompositeObjId]:
         if self.PRIMARY_HASH != "sha256":
             raise ValueError(f"Unknown primary hash {self.PRIMARY_HASH}")
-        for signature in self.winery.list_signatures():
+        for signature in self.reader.list_signatures():
             yield {"sha256": signature}
 
     def list_content(
@@ -103,17 +110,20 @@ class WineryObjStorage(ObjStorage):
         if last_obj_id:
             after_id = self._hash(last_obj_id)
 
-        for signature in self.winery.list_signatures(after_id=after_id, limit=limit):
+        for signature in self.reader.list_signatures(after_id=after_id, limit=limit):
             yield {"sha256": signature}
 
     def on_shutdown(self):
-        self.winery.on_shutdown()
+        if self.writer:
+            self.writer.on_shutdown()
 
 
-class WineryBase:
+class WineryReader:
     def __init__(self, **kwargs):
         self.args = kwargs
         self.base = SharedBase(**self.args)
+        self.ro_shards = {}
+        self.rw_shards = {}
 
     def __contains__(self, obj_id):
         return self.base.contains(obj_id)
@@ -123,16 +133,6 @@ class WineryBase:
     ) -> Iterator[bytes]:
         yield from self.base.list_signatures(after_id, limit)
 
-    def on_shutdown(self):
-        return
-
-
-class WineryReader(WineryBase):
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        self.ro_shards = {}
-        self.rw_shards = {}
-
     def roshard(self, name) -> Optional[ROShard]:
         if name not in self.ro_shards:
             try:
@@ -204,7 +204,7 @@ def cleanup_rw_shard(shard, shared_base=None, **kwargs) -> bool:
     return True
 
 
-class WineryWriter(WineryReader):
+class WineryWriter:
     def __init__(
         self,
         pack_immediately: bool = True,
@@ -214,7 +214,8 @@ class WineryWriter(WineryReader):
     ):
         self.pack_immediately = pack_immediately
         self.clean_immediately = clean_immediately
-        super().__init__(**kwargs)
+        self.args = kwargs
+        self.base = SharedBase(**self.args)
         self.shards_filled: List[str] = []
         self.packers: List[Process] = []
         self._shard: Optional[RWShard] = None
@@ -258,10 +259,7 @@ class WineryWriter(WineryReader):
             )
         return self._shard
 
-    def add(self, content: bytes, obj_id: bytes, check_presence: bool = True) -> None:
-        if check_presence and obj_id in self:
-            return
-
+    def add(self, content: bytes, obj_id: bytes) -> None:
         with self.base.pool.connection() as db, db.transaction():
             shard = self.base.record_new_obj_id(db, obj_id)
             if shard != self.base.locked_shard_id:
@@ -285,7 +283,7 @@ class WineryWriter(WineryReader):
         # We only care about RWShard for now. ROShards will be
         # taken care in a batch job.
         if not state.image_available:
-            rwshard = self.rwshard(name)
+            rwshard = RWShard(name, **self.args)
             try:
                 rwshard.delete(obj_id)
             except KeyError:
diff --git a/swh/objstorage/tests/test_objstorage_winery.py b/swh/objstorage/tests/test_objstorage_winery.py
index 651a716..5f613aa 100644
--- a/swh/objstorage/tests/test_objstorage_winery.py
+++ b/swh/objstorage/tests/test_objstorage_winery.py
@@ -242,13 +242,18 @@ def storage(
 
 
 @pytest.fixture
-def winery(storage):
-    return storage.winery
+def winery_reader(storage):
+    return storage.reader
 
 
-def test_winery_sharedbase(winery):
-    base = winery.base
-    shard1 = winery.shard.name
+@pytest.fixture
+def winery_writer(storage):
+    return storage.writer
+
+
+def test_winery_sharedbase(winery_writer):
+    base = winery_writer.base
+    shard1 = winery_writer.shard.name
     assert shard1 is not None
     assert shard1 == base.locked_shard
 
@@ -258,36 +263,35 @@ def test_winery_sharedbase(winery):
 
     assert base.get_shard_state(shard1) == ShardState.WRITING
 
-    winery.release_shard()
+    winery_writer.release_shard()
 
-    assert winery.base._locked_shard is None
+    assert winery_writer.base._locked_shard is None
     assert base.get_shard_state(shard1) == ShardState.STANDBY
 
-    shard2 = winery.base.locked_shard
+    shard2 = winery_writer.base.locked_shard
 
     assert shard1 == shard2, "Locked a different shard?"
     assert base.get_shard_state(shard1) == ShardState.WRITING
 
 
-def test_winery_add_get(winery):
-    shard = winery.base.locked_shard
+def test_winery_add_get(winery_writer, winery_reader):
+    shard = winery_writer.base.locked_shard
     content = b"SOMETHING"
     sha256 = objid_for_content(content)["sha256"]
     assert (
         sha256.hex()
         == "866878b165607851782d8d233edf0c261172ff67926330d3bbd10c705b92d24f"
     )
-    winery.add(content=content, obj_id=sha256)
-    winery.add(content=content, obj_id=sha256)
-    winery.add(content=content, obj_id=sha256, check_presence=False)
-    assert winery.base.locked_shard == shard
-    assert winery.get(sha256) == content
+    winery_writer.add(content=content, obj_id=sha256)
+    winery_writer.add(content=content, obj_id=sha256)
+    assert winery_writer.base.locked_shard == shard
+    assert winery_reader.get(sha256) == content
     with pytest.raises(ObjNotFoundError):
-        winery.get(b"unknown")
-    winery.shard.drop()
+        winery_reader.get(b"unknown")
+    winery_writer.shard.drop()
 
 
-def test_winery_add_concurrent(winery, winery_settings, mocker):
+def test_winery_add_concurrent(winery_settings, mocker):
     num_threads = 4
 
     class ManualReleaseSharedBase(SharedBase):
@@ -323,13 +327,13 @@ def test_winery_add_concurrent(winery, winery_settings, mocker):
         thread.start()
 
     for storage in reversed(storages):
-        storage.winery.base.release_obj_id.set()
+        storage.writer.base.release_obj_id.set()
 
     for thread in threads:
         thread.join()
 
-    assert winery.get(obj_id["sha256"]) == content
-    assert sum(1 for _ in winery.base.list_shards()) >= num_threads
+    assert storage.reader.get(obj_id["sha256"]) == content
+    assert sum(1 for _ in storage.reader.base.list_shards()) >= num_threads
 
     for storage in storages:
         assert isinstance(storage, WineryObjStorage)
@@ -337,49 +341,49 @@ def test_winery_add_concurrent(winery, winery_settings, mocker):
 
 
 @pytest.mark.shard_max_size(1)
-def test_winery_add_and_pack(winery, mocker):
+def test_winery_add_and_pack(winery_writer, mocker):
     mocker.patch("swh.objstorage.backends.winery.objstorage.pack", return_value=True)
-    shard = winery.base.locked_shard
+    shard = winery_writer.base.locked_shard
     content = b"SOMETHING"
     sha256 = objid_for_content(content)["sha256"]
-    winery.add(content=content, obj_id=sha256)
-    assert winery.base.locked_shard != shard
-    assert len(winery.packers) == 1
-    packer = winery.packers[0]
+    winery_writer.add(content=content, obj_id=sha256)
+    assert winery_writer.base.locked_shard != shard
+    assert len(winery_writer.packers) == 1
+    packer = winery_writer.packers[0]
     packer.join()
     assert packer.exitcode == 0
 
 
-def test_winery_delete_on_rwshard(winery):
-    shard = winery.base.locked_shard
+def test_winery_delete_on_rwshard(winery_writer, winery_reader):
+    shard = winery_writer.base.locked_shard
     content = b"SOMETHING"
     sha256 = objid_for_content(content)["sha256"]
-    winery.add(content=content, obj_id=sha256)
-    assert winery.base.locked_shard == shard
-    assert winery.get(sha256) == content
-    winery.delete(sha256)
+    winery_writer.add(content=content, obj_id=sha256)
+    assert winery_writer.base.locked_shard == shard
+    assert winery_reader.get(sha256) == content
+    winery_writer.delete(sha256)
     with pytest.raises(ObjNotFoundError):
-        winery.get(sha256)
+        winery_reader.get(sha256)
 
 
 @pytest.mark.shard_max_size(1)
 @pytest.mark.pack_immediately(True)
-def test_winery_delete_on_roshard(winery, file_backed_pool):
-    shard = winery.base.locked_shard
+def test_winery_delete_on_roshard(winery_writer, winery_reader, file_backed_pool):
+    shard = winery_writer.base.locked_shard
     content = b"SOMETHING"
     sha256 = objid_for_content(content)["sha256"]
-    winery.add(content=content, obj_id=sha256)
-    assert winery.base.locked_shard != shard
-    assert winery.packers
-    for packer in winery.packers:
+    winery_writer.add(content=content, obj_id=sha256)
+    assert winery_writer.base.locked_shard != shard
+    assert winery_writer.packers
+    for packer in winery_writer.packers:
         packer.join()
-    assert winery.get(sha256) == content
+    assert winery_reader.get(sha256) == content
     # This will only mark as deleted in SharedBase
-    winery.delete(sha256)
-    assert len(list(winery.base.deleted_objects())) == 1
+    winery_writer.delete(sha256)
+    assert len(list(winery_writer.base.deleted_objects())) == 1
     # We still should not be able to access it
     with pytest.raises(ObjNotFoundError):
-        winery.get(sha256)
+        winery_reader.get(sha256)
     # The content is still present in the roshard image at this point
     image_path = file_backed_pool.image_path(shard)
     with open(image_path, "rb") as image:
@@ -387,8 +391,10 @@ def test_winery_delete_on_roshard(winery, file_backed_pool):
     # Perform cleanup
     file_backed_pool.image_unmap(shard)
     file_backed_pool.image_map(shard, "rw")
-    deleted_objects_cleaner(winery.base, file_backed_pool, stop_running=lambda: False)
-    assert len(list(winery.base.deleted_objects())) == 0
+    deleted_objects_cleaner(
+        winery_reader.base, file_backed_pool, stop_running=lambda: False
+    )
+    assert len(list(winery_reader.base.deleted_objects())) == 0
     with open(image_path, "rb") as image:
         assert b"SOMETHING" not in image.read()
 
@@ -396,32 +402,32 @@ def test_winery_delete_on_roshard(winery, file_backed_pool):
 @pytest.mark.shard_max_size(20)
 @pytest.mark.pack_immediately(True)
 def test_winery_deleted_objects_cleaner_handles_exception(
-    winery, file_backed_pool, mocker
+    winery_writer, file_backed_pool, mocker
 ):
     from swh.objstorage.backends.winery import objstorage as winery_objstorage
 
     from ..backends.winery.roshard import ROShard
 
     # Add two objects
-    shard = winery.base.locked_shard
+    shard = winery_writer.base.locked_shard
     content1 = b"PINOT GRIS"
     sha256_1 = objid_for_content(content1)["sha256"]
-    winery.add(content=content1, obj_id=sha256_1)
+    winery_writer.add(content=content1, obj_id=sha256_1)
     content2 = b"CHARDONNAY"
     sha256_2 = objid_for_content(content2)["sha256"]
-    winery.add(content=content2, obj_id=sha256_2)
+    winery_writer.add(content=content2, obj_id=sha256_2)
 
     # This should be enough bytes to trigger packing
-    for packer in winery.packers:
+    for packer in winery_writer.packers:
         packer.join()
 
     # We should only have one roshard
     assert len(file_backed_pool.image_list()) == 1
 
     # This will only mark as deleted in SharedBase for the time being
-    winery.delete(sha256_1)
-    winery.delete(sha256_2)
-    assert len(list(winery.base.deleted_objects())) == 2
+    winery_writer.delete(sha256_1)
+    winery_writer.delete(sha256_2)
+    assert len(list(winery_writer.base.deleted_objects())) == 2
 
     # The content is still present in the roshard image at this point
     image_path = file_backed_pool.image_path(shard)
@@ -448,11 +454,11 @@ def test_winery_deleted_objects_cleaner_handles_exception(
     file_backed_pool.image_map(shard, "rw")
     with pytest.raises(OSError):
         winery_objstorage.deleted_objects_cleaner(
-            winery.base, file_backed_pool, stop_running=lambda: False
+            winery_writer.base, file_backed_pool, stop_running=lambda: False
         )
 
     # We should only have one remaining object to delete
-    assert len(list(winery.base.deleted_objects())) == 1
+    assert len(list(winery_writer.base.deleted_objects())) == 1
 
     # We should have only the content of one of the objects still in the roshard
     with open(image_path, "rb") as image:
@@ -461,72 +467,76 @@ def test_winery_deleted_objects_cleaner_handles_exception(
         assert sorted(presences) == [False, True]
 
 
-def test_winery_get_shard_info(winery):
-    assert winery.base.get_shard_info(1234) is None
-    assert winery.base.get_shard_state("nothing") is None
+def test_winery_get_shard_info(winery_reader):
+    assert winery_reader.base.get_shard_info(1234) is None
+    assert winery_reader.base.get_shard_state("nothing") is None
 
 
-def test_winery_base_record_shard_mapped(winery):
+def test_winery_base_record_shard_mapped(winery_writer):
     # Lock a shard
-    shard_name, shard_id = winery.base.create_shard(new_state=ShardState.PACKED)
+    shard_name, shard_id = winery_writer.base.create_shard(new_state=ShardState.PACKED)
 
-    assert {"test"} == winery.base.record_shard_mapped(host="test", name=shard_name)
-    assert {"test"} == winery.base.record_shard_mapped(host="test", name=shard_name)
-    assert {"test", "test2"} == winery.base.record_shard_mapped(
+    assert {"test"} == winery_writer.base.record_shard_mapped(
+        host="test", name=shard_name
+    )
+    assert {"test"} == winery_writer.base.record_shard_mapped(
+        host="test", name=shard_name
+    )
+    assert {"test", "test2"} == winery_writer.base.record_shard_mapped(
         host="test2", name=shard_name
     )
 
 
 @pytest.mark.shard_max_size(10 * 1024 * 1024)
 @pytest.mark.clean_immediately(False)
-def test_winery_pack(winery, image_pool):
-    shard = winery.base.locked_shard
+def test_winery_pack(winery_writer, image_pool):
+    shard = winery_writer.base.locked_shard
     content = b"SOMETHING"
     sha256 = objid_for_content(content)["sha256"]
-    winery.add(content=content, obj_id=sha256)
-    winery.base.set_shard_state(ShardState.FULL)
-    winery.base.shard_packing_starts(shard)
+    winery_writer.add(content=content, obj_id=sha256)
+    winery_writer.base.set_shard_state(ShardState.FULL)
+    winery_writer.base.shard_packing_starts(shard)
 
-    assert pack(shard, **winery.args)
-    assert winery.base.get_shard_state(shard) == ShardState.PACKED
+    assert pack(shard, **winery_writer.args)
+    assert winery_writer.base.get_shard_state(shard) == ShardState.PACKED
 
-    assert cleanup_rw_shard(shard, **winery.args)
-    assert winery.base.get_shard_state(shard) == ShardState.READONLY
+    assert cleanup_rw_shard(shard, **winery_writer.args)
+    assert winery_writer.base.get_shard_state(shard) == ShardState.READONLY
 
 
 @pytest.mark.shard_max_size(1024 * 1024)
 @pytest.mark.pack_immediately(True)
 def test_winery_writer_pack_immediately_true(image_pool, storage):
-    shard = storage.winery.base.locked_shard
+    shard = storage.writer.base.locked_shard
 
     for i in range(1024):
         content = i.to_bytes(1024, "little")
         obj_id = objid_for_content(content)
         storage.add(content=content, obj_id=obj_id)
 
-    assert storage.winery.packers
-    for packer in storage.winery.packers:
+    assert storage.writer.packers
+    for packer in storage.writer.packers:
         packer.join()
 
-    assert storage.winery.base.locked_shard != shard
+    assert storage.writer.base.locked_shard != shard
 
-    assert storage.winery.base.get_shard_state(shard) == ShardState.READONLY
+    assert storage.writer.base.get_shard_state(shard) == ShardState.READONLY
 
 
 @pytest.mark.shard_max_size(1024 * 1024)
 @pytest.mark.pack_immediately(False)
 def test_winery_writer_pack_immediately_false(storage):
-    shard = storage.winery.base.locked_shard
+    shard = storage.writer.base.locked_shard
 
     for i in range(1024):
         content = i.to_bytes(1024, "little")
         obj_id = objid_for_content(content)
         storage.add(content=content, obj_id=obj_id)
 
-    assert storage.winery.base.locked_shard != shard
-    assert not storage.winery.packers
+    assert storage.writer.base.locked_shard != shard
+    assert not storage.writer.packers
 
-    assert storage.winery.base.get_shard_state(shard) == ShardState.FULL
+    assert storage.writer.base.get_shard_state(shard) == ShardState.FULL
 
 
 @pytest.mark.parametrize(
@@ -575,10 +585,10 @@ def test_winery_standalone_packer(winery_settings, image_pool, storage):
         obj_id = objid_for_content(content)
         storage.add(content=content, obj_id=obj_id)
 
-    filled = storage.winery.shards_filled
+    filled = storage.writer.shards_filled
     assert len(filled) == 4
 
-    shard_info = dict(storage.winery.base.list_shards())
+    shard_info = dict(storage.writer.base.list_shards())
     for shard in filled:
         assert shard_info[shard] == ShardState.FULL
 
@@ -591,7 +601,7 @@ def test_winery_standalone_packer(winery_settings, image_pool, storage):
         == 1
     )
 
-    shard_counts = Counter(state for _, state in storage.winery.base.list_shards())
+    shard_counts = Counter(state for _, state in storage.writer.base.list_shards())
     assert shard_counts == {
         ShardState.FULL: 3,
         ShardState.PACKED: 1,
@@ -607,7 +617,7 @@ def test_winery_standalone_packer(winery_settings, image_pool, storage):
         == 1
     )
 
-    shard_counts = Counter(state for _, state in storage.winery.base.list_shards())
+    shard_counts = Counter(state for _, state in storage.writer.base.list_shards())
     assert shard_counts == {
         ShardState.FULL: 3,
         ShardState.READONLY: 1,
@@ -622,7 +632,7 @@ def test_winery_standalone_packer(winery_settings, image_pool, storage):
         == 3
     )
 
-    shard_counts = Counter(state for _, state in storage.winery.base.list_shards())
+    shard_counts = Counter(state for _, state in storage.writer.base.list_shards())
     assert shard_counts == {
         ShardState.PACKED: 3,
         ShardState.READONLY: 1,
@@ -638,7 +648,7 @@ def test_winery_standalone_packer(winery_settings, image_pool, storage):
         == 3
     )
 
-    shard_counts = Counter(state for _, state in storage.winery.base.list_shards())
+    shard_counts = Counter(state for _, state in storage.writer.base.list_shards())
     assert shard_counts == {ShardState.READONLY: 4}
 
 
@@ -656,7 +666,7 @@ def test_winery_packer_clean_up_interrupted_shard(
         obj_id = objid_for_content(content)
         storage.add(content=content, obj_id=obj_id)
 
-    filled = storage.winery.shards_filled
+    filled = storage.writer.shards_filled
     assert len(filled) == 1
 
     shard = filled[0]
@@ -704,10 +714,10 @@ def test_winery_cli_packer(image_pool, storage, tmp_path, winery_settings, cli_r
         obj_id = objid_for_content(content)
         storage.add(content=content, obj_id=obj_id)
 
-    filled = storage.winery.shards_filled
+    filled = storage.writer.shards_filled
     assert len(filled) == 4
 
-    shard_info = dict(storage.winery.base.list_shards())
+    shard_info = dict(storage.writer.base.list_shards())
     for shard in filled:
         assert shard_info[shard] == ShardState.FULL
 
@@ -722,7 +732,7 @@ def test_winery_cli_packer(image_pool, storage, tmp_path, winery_settings, cli_r
 
     assert result.exit_code == 0
 
-    shard_info = dict(storage.winery.base.list_shards())
+    shard_info = dict(storage.writer.base.list_shards())
     for shard in filled:
         assert shard_info[shard] == ShardState.PACKED
 
@@ -739,10 +749,10 @@ def test_winery_cli_packer_rollback_on_error(
         obj_id = objid_for_content(content)
         storage.add(content=content, obj_id=obj_id)
 
-    filled = storage.winery.shards_filled
+    filled = storage.writer.shards_filled
     assert len(filled) == 4
 
-    shard_info = dict(storage.winery.base.list_shards())
+    shard_info = dict(storage.writer.base.list_shards())
     for shard in filled:
         assert shard_info[shard] == ShardState.FULL
 
@@ -766,7 +776,7 @@ def test_winery_cli_packer_rollback_on_error(
 
     assert result.exit_code == 1
 
-    shard_info = dict(storage.winery.base.list_shards())
+    shard_info = dict(storage.writer.base.list_shards())
     for shard in filled:
         assert (
             shard_info[shard] == ShardState.FULL
@@ -782,10 +792,10 @@ def test_winery_cli_rbd(image_pool, storage, tmp_path, winery_settings, cli_runn
         obj_id = objid_for_content(content)
         storage.add(content=content, obj_id=obj_id)
 
-    filled = storage.winery.shards_filled
+    filled = storage.writer.shards_filled
     assert len(filled) == 4
 
-    shard_info = dict(storage.winery.base.list_shards())
+    shard_info = dict(storage.writer.base.list_shards())
     for shard in filled:
         assert shard_info[shard] == ShardState.FULL
 
@@ -827,7 +837,7 @@ def test_winery_cli_rbd(image_pool, storage, tmp_path, winery_settings, cli_runn
         assert image_pool.image_mapped(shard) == "rw"
 
     for shard in filled:
-        storage.winery.base.set_shard_state(name=shard, new_state=ShardState.PACKED)
+        storage.writer.base.set_shard_state(name=shard, new_state=ShardState.PACKED)
 
     result = cli_runner.invoke(
         swh_cli_group,
@@ -853,21 +863,21 @@ def test_winery_cli_rw_shard_cleaner(
         obj_id = objid_for_content(content)
         storage.add(content=content, obj_id=obj_id)
 
-    filled = storage.winery.shards_filled
+    filled = storage.writer.shards_filled
     assert len(filled) == 4
 
-    for packer in storage.winery.packers:
+    for packer in storage.writer.packers:
         packer.join()
         assert packer.exitcode == 0
 
-    shard_info = dict(storage.winery.base.list_shards())
+    shard_info = dict(storage.writer.base.list_shards())
     for shard in filled:
         assert shard_info[shard] == ShardState.PACKED
 
     with open(tmp_path / "config.yml", "w") as f:
         yaml.safe_dump({"objstorage": {"cls": "winery", **winery_settings}}, stream=f)
 
-    shard_tables = set(storage.winery.base.list_shard_tables())
+    shard_tables = set(storage.writer.base.list_shard_tables())
     for shard in filled:
         assert shard in shard_tables
 
@@ -880,7 +890,7 @@ def test_winery_cli_rw_shard_cleaner(
     assert result.exit_code == 0
 
     # No hosts have mapped the shard as remapped, so the cleaner has done nothing
-    shard_tables = set(storage.winery.base.list_shard_tables())
+    shard_tables = set(storage.writer.base.list_shard_tables())
     for shard in filled:
         assert shard in shard_tables
 
@@ -899,7 +909,7 @@ def test_winery_cli_rw_shard_cleaner(
     assert result.exit_code == 0
 
     # Now we've forced action
-    shard_tables = set(storage.winery.base.list_shard_tables())
+    shard_tables = set(storage.writer.base.list_shard_tables())
     for shard in filled:
         assert shard not in shard_tables
 
@@ -916,21 +926,21 @@ def test_winery_cli_rw_shard_cleaner_rollback_on_error(
         obj_id = objid_for_content(content)
         storage.add(content=content, obj_id=obj_id)
 
-    filled = storage.winery.shards_filled
+    filled = storage.writer.shards_filled
     assert len(filled) == 4
 
-    for packer in storage.winery.packers:
+    for packer in storage.writer.packers:
         packer.join()
         assert packer.exitcode == 0
 
-    shard_info = dict(storage.winery.base.list_shards())
+    shard_info = dict(storage.writer.base.list_shards())
     for shard in filled:
         assert shard_info[shard] == ShardState.PACKED
 
     with open(tmp_path / "config.yml", "w") as f:
         yaml.safe_dump({"objstorage": {"cls": "winery", **winery_settings}}, stream=f)
 
-    shard_tables = set(storage.winery.base.list_shard_tables())
+    shard_tables = set(storage.writer.base.list_shard_tables())
     for shard in filled:
         assert shard in shard_tables
 
@@ -958,8 +968,8 @@ def test_winery_cli_rw_shard_cleaner_rollback_on_error(
 
     assert result.exit_code == 1
 
-    shard_tables = set(storage.winery.base.list_shard_tables())
-    shard_info = dict(storage.winery.base.list_shards())
+    shard_tables = set(storage.writer.base.list_shard_tables())
+    shard_info = dict(storage.writer.base.list_shards())
     for shard in filled:
         assert shard in shard_tables
         assert shard_info[shard] == ShardState.PACKED
@@ -977,10 +987,10 @@ def test_winery_standalone_packer_never_stop_packing(
         obj_id = objid_for_content(content)
         storage.add(content=content, obj_id=obj_id)
 
-    filled = storage.winery.shards_filled
+    filled = storage.writer.shards_filled
     assert len(filled) == 4
 
-    shard_info = dict(storage.winery.base.list_shards())
+    shard_info = dict(storage.writer.base.list_shards())
     for shard in filled:
         assert shard_info[shard] == ShardState.FULL
 
@@ -1002,7 +1012,7 @@ def test_winery_standalone_packer_never_stop_packing(
 
     assert called == list(range(5))
 
-    shard_counts = Counter(state for _, state in storage.winery.base.list_shards())
+    shard_counts = Counter(state for _, state in storage.writer.base.list_shards())
     assert shard_counts == {ShardState.PACKED: 4}
 
     called = []
@@ -1016,20 +1026,20 @@ def test_winery_standalone_packer_never_stop_packing(
 
     assert called == list(range(5))
 
-    shard_counts = Counter(state for _, state in storage.winery.base.list_shards())
+    shard_counts = Counter(state for _, state in storage.writer.base.list_shards())
     assert shard_counts == {ShardState.READONLY: 4}
 
 
 @pytest.mark.shard_max_size(10 * 1024 * 1024)
-def test_winery_get_object(winery, image_pool):
-    shard = winery.base.locked_shard
+def test_winery_get_object(winery_writer, winery_reader, image_pool):
+    shard = winery_writer.base.locked_shard
     content = b"SOMETHING"
     sha256 = objid_for_content(content)["sha256"]
-    winery.add(content=content, obj_id=sha256)
-    winery.base.set_shard_state(ShardState.FULL)
-    winery.base.shard_packing_starts(shard)
-    assert pack(shard, **winery.args) is True
-    assert winery.get(sha256) == content
+    winery_writer.add(content=content, obj_id=sha256)
+    winery_writer.base.set_shard_state(ShardState.FULL)
+    winery_writer.base.shard_packing_starts(shard)
+    assert pack(shard, **winery_writer.args) is True
+    assert winery_reader.get(sha256) == content
 
 
 @pytest.mark.skipif("CEPH_HARDCODE_POOL" in os.environ, reason="Ceph pool hardcoded")
-- 
GitLab


From c967ea4746e7873001cc9268e1e8f39bb3682aa1 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Fri, 28 Feb 2025 17:32:30 +0100
Subject: [PATCH 08/25] winery: use new-style settings for pack function

---
 swh/objstorage/backends/winery/objstorage.py  | 25 +++++++++++--------
 swh/objstorage/backends/winery/settings.py    |  1 -
 .../tests/test_objstorage_winery.py           | 11 +++++---
 3 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
index 3b66531..b0aa245 100644
--- a/swh/objstorage/backends/winery/objstorage.py
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -52,7 +52,9 @@ class WineryObjStorage(ObjStorage):
         if readonly:
             self.writer = None
         else:
-            self.writer = WineryWriter(**legacy_kwargs)
+            self.writer = WineryWriter(
+                packer_settings=self.settings["packer"], **legacy_kwargs
+            )
 
     @timed
     def get(self, obj_id: ObjId) -> bytes:
@@ -168,7 +170,7 @@ class WineryReader:
         return content
 
 
-def pack(shard, shared_base=None, clean_immediately=False, **kwargs) -> bool:
+def pack(shard, packer_settings: settings.Packer, shared_base=None, **kwargs) -> bool:
     rw = RWShard(shard, **kwargs)
 
     count = rw.count()
@@ -187,7 +189,7 @@ def pack(shard, shared_base=None, clean_immediately=False, **kwargs) -> bool:
     if not shared_base:
         shared_base = SharedBase(**kwargs)
     shared_base.shard_packing_ends(shard)
-    if clean_immediately:
+    if packer_settings["clean_immediately"]:
         cleanup_rw_shard(shard, shared_base=shared_base, **kwargs)
     return True
 
@@ -207,13 +209,11 @@ def cleanup_rw_shard(shard, shared_base=None, **kwargs) -> bool:
 class WineryWriter:
     def __init__(
         self,
-        pack_immediately: bool = True,
-        clean_immediately: bool = True,
+        packer_settings: settings.Packer,
         rwshard_idle_timeout: float = 300,
         **kwargs,
     ):
-        self.pack_immediately = pack_immediately
-        self.clean_immediately = clean_immediately
+        self.packer_settings = packer_settings
         self.args = kwargs
         self.base = SharedBase(**self.args)
         self.shards_filled: List[str] = []
@@ -272,7 +272,7 @@ class WineryWriter:
             filled_name = self.shard.name
             self.release_shard(new_state=ShardState.FULL)
             self.shards_filled.append(filled_name)
-            if self.pack_immediately:
+            if self.packer_settings["pack_immediately"]:
                 self.pack(filled_name)
 
     def delete(self, obj_id: bytes):
@@ -308,7 +308,7 @@ class WineryWriter:
             target=pack,
             kwargs={
                 "shard": shard_name,
-                "clean_immediately": self.clean_immediately,
+                "packer_settings": self.packer_settings,
                 **self.args,
             },
         )
@@ -399,7 +399,12 @@ def shard_packer(
 
         with locked:
             logger.info("shard_packer: Locked shard %s to pack", locked.name)
-            ret = pack(locked.name, shared_base=base, **legacy_kwargs)
+            ret = pack(
+                locked.name,
+                packer_settings=all_settings["packer"],
+                shared_base=base,
+                **legacy_kwargs,
+            )
             if not ret:
                 raise ValueError("Packing shard %s failed" % locked.name)
             shards_packed += 1
diff --git a/swh/objstorage/backends/winery/settings.py b/swh/objstorage/backends/winery/settings.py
index 2004475..1e329dc 100644
--- a/swh/objstorage/backends/winery/settings.py
+++ b/swh/objstorage/backends/winery/settings.py
@@ -145,6 +145,5 @@ def populate_default_settings(
     if packer is not None:
         packer = packer_settings_with_defaults(packer)
         settings["packer"] = packer
-        legacy_kwargs.update(packer)
 
     return settings, legacy_kwargs
diff --git a/swh/objstorage/tests/test_objstorage_winery.py b/swh/objstorage/tests/test_objstorage_winery.py
index 5f613aa..fd8573c 100644
--- a/swh/objstorage/tests/test_objstorage_winery.py
+++ b/swh/objstorage/tests/test_objstorage_winery.py
@@ -489,7 +489,7 @@ def test_winery_base_record_shard_mapped(winery_writer):
 
 @pytest.mark.shard_max_size(10 * 1024 * 1024)
 @pytest.mark.clean_immediately(False)
-def test_winery_pack(winery_writer, image_pool):
+def test_winery_pack(winery_settings, winery_writer, image_pool):
     shard = winery_writer.base.locked_shard
     content = b"SOMETHING"
     sha256 = objid_for_content(content)["sha256"]
@@ -497,7 +497,7 @@ def test_winery_pack(winery_writer, image_pool):
     winery_writer.base.set_shard_state(ShardState.FULL)
     winery_writer.base.shard_packing_starts(shard)
 
-    assert pack(shard, **winery_writer.args)
+    assert pack(shard, packer_settings=winery_settings["packer"], **winery_writer.args)
     assert winery_writer.base.get_shard_state(shard) == ShardState.PACKED
 
     assert cleanup_rw_shard(shard, **winery_writer.args)
@@ -1031,14 +1031,17 @@ def test_winery_standalone_packer_never_stop_packing(
 
 
 @pytest.mark.shard_max_size(10 * 1024 * 1024)
-def test_winery_get_object(winery_writer, winery_reader, image_pool):
+def test_winery_get_object(winery_settings, winery_writer, winery_reader, image_pool):
     shard = winery_writer.base.locked_shard
     content = b"SOMETHING"
     sha256 = objid_for_content(content)["sha256"]
     winery_writer.add(content=content, obj_id=sha256)
     winery_writer.base.set_shard_state(ShardState.FULL)
     winery_writer.base.shard_packing_starts(shard)
-    assert pack(shard, **winery_writer.args) is True
+    assert (
+        pack(shard, packer_settings=winery_settings["packer"], **winery_writer.args)
+        is True
+    )
     assert winery_reader.get(sha256) == content
 
 
-- 
GitLab


From fd3e092a960befd96384e0d8bbc9be3982da9a60 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Fri, 28 Feb 2025 18:23:06 +0100
Subject: [PATCH 09/25] Add typing for WineryReader ro_shards and rw_shards

---
 swh/objstorage/backends/winery/objstorage.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
index b0aa245..dac41cc 100644
--- a/swh/objstorage/backends/winery/objstorage.py
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -6,7 +6,7 @@
 from functools import partial
 import logging
 from multiprocessing import Process
-from typing import Callable, Iterator, List, Optional
+from typing import Callable, Dict, Iterator, List, Optional
 
 from swh.objstorage.constants import DEFAULT_LIMIT
 from swh.objstorage.exc import ObjNotFoundError, ReadOnlyObjStorageError
@@ -124,8 +124,8 @@ class WineryReader:
     def __init__(self, **kwargs):
         self.args = kwargs
         self.base = SharedBase(**self.args)
-        self.ro_shards = {}
-        self.rw_shards = {}
+        self.ro_shards: Dict[str, ROShard] = {}
+        self.rw_shards: Dict[str, RWShard] = {}
 
     def __contains__(self, obj_id):
         return self.base.contains(obj_id)
-- 
GitLab


From 442effbee66ca40f48ce7a330b976b2d24cafc17 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Fri, 28 Feb 2025 18:23:29 +0100
Subject: [PATCH 10/25] winery: use new-style settings for Throttler

---
 swh/objstorage/backends/winery/objstorage.py  | 31 +++++++++++++++----
 swh/objstorage/backends/winery/roshard.py     | 13 +++++---
 swh/objstorage/backends/winery/settings.py    |  2 --
 swh/objstorage/backends/winery/throttler.py   | 30 ++++++++++++------
 .../tests/test_objstorage_winery.py           | 21 ++++++++++---
 5 files changed, 72 insertions(+), 25 deletions(-)

diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
index dac41cc..3b98efe 100644
--- a/swh/objstorage/backends/winery/objstorage.py
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -47,13 +47,17 @@ class WineryObjStorage(ObjStorage):
             packer=(packer or {}),
         )
 
-        self.reader = WineryReader(**legacy_kwargs)
+        self.reader = WineryReader(
+            throttler_settings=self.settings["throttler"], **legacy_kwargs
+        )
 
         if readonly:
             self.writer = None
         else:
             self.writer = WineryWriter(
-                packer_settings=self.settings["packer"], **legacy_kwargs
+                packer_settings=self.settings["packer"],
+                throttler_settings=self.settings["throttler"],
+                **legacy_kwargs,
             )
 
     @timed
@@ -121,8 +125,9 @@ class WineryObjStorage(ObjStorage):
 
 
 class WineryReader:
-    def __init__(self, **kwargs):
+    def __init__(self, throttler_settings: settings.Throttler, **kwargs):
         self.args = kwargs
+        self.throttler_settings = throttler_settings
         self.base = SharedBase(**self.args)
         self.ro_shards: Dict[str, ROShard] = {}
         self.rw_shards: Dict[str, RWShard] = {}
@@ -138,7 +143,9 @@ class WineryReader:
     def roshard(self, name) -> Optional[ROShard]:
         if name not in self.ro_shards:
             try:
-                shard = ROShard(name, **self.args)
+                shard = ROShard(
+                    name=name, throttler_settings=self.throttler_settings, **self.args
+                )
             except ShardNotMapped:
                 return None
             self.ro_shards[name] = shard
@@ -170,12 +177,20 @@ class WineryReader:
         return content
 
 
-def pack(shard, packer_settings: settings.Packer, shared_base=None, **kwargs) -> bool:
+def pack(
+    shard,
+    packer_settings: settings.Packer,
+    throttler_settings: settings.Throttler,
+    shared_base=None,
+    **kwargs,
+) -> bool:
     rw = RWShard(shard, **kwargs)
 
     count = rw.count()
     logger.info("Creating RO shard %s for %s objects", shard, count)
-    with ROShardCreator(shard, count, **kwargs) as ro:
+    with ROShardCreator(
+        shard, count, throttler_settings=throttler_settings, **kwargs
+    ) as ro:
         logger.info("Created RO shard %s", shard)
         for i, (obj_id, content) in enumerate(rw.all()):
             ro.add(content, obj_id)
@@ -210,10 +225,12 @@ class WineryWriter:
     def __init__(
         self,
         packer_settings: settings.Packer,
+        throttler_settings: settings.Throttler,
         rwshard_idle_timeout: float = 300,
         **kwargs,
     ):
         self.packer_settings = packer_settings
+        self.throttler_settings = throttler_settings
         self.args = kwargs
         self.base = SharedBase(**self.args)
         self.shards_filled: List[str] = []
@@ -309,6 +326,7 @@ class WineryWriter:
             kwargs={
                 "shard": shard_name,
                 "packer_settings": self.packer_settings,
+                "throttler_settings": self.throttler_settings,
                 **self.args,
             },
         )
@@ -402,6 +420,7 @@ def shard_packer(
             ret = pack(
                 locked.name,
                 packer_settings=all_settings["packer"],
+                throttler_settings=all_settings["throttler"],
                 shared_base=base,
                 **legacy_kwargs,
             )
diff --git a/swh/objstorage/backends/winery/roshard.py b/swh/objstorage/backends/winery/roshard.py
index e762d8a..3da1036 100644
--- a/swh/objstorage/backends/winery/roshard.py
+++ b/swh/objstorage/backends/winery/roshard.py
@@ -322,7 +322,7 @@ class Pool(object):
 
 
 class ROShard:
-    def __init__(self, name, **kwargs):
+    def __init__(self, name, throttler_settings, **kwargs):
         self.pool = Pool.from_kwargs(**kwargs)
         image_status = self.pool.image_mapped(name)
 
@@ -331,7 +331,9 @@ class ROShard:
                 f"RBD image for {name} isn't mapped{' read-only' if image_status=='rw' else ''}"
             )
 
-        self.throttler = Throttler(**kwargs)
+        self.throttler = Throttler(
+            throttler_settings=throttler_settings, db=kwargs["base_dsn"]
+        )
         self.name = name
         self.path = self.pool.image_path(self.name)
         self.shard = None
@@ -381,13 +383,14 @@ class ROShardCreator:
       rbd_wait_for_image: function called when waiting for a shard to be mapped
       shard_max_size: the size of the shard, passed to :class:`Pool`
       rbd_*: other RBD-related :class:`Pool` arguments
-      throttle_*: :class:`Throttler` arguments
+      throttler_settings: passed to the :class:`Throttler`
     """
 
     def __init__(
         self,
         name: str,
         count: int,
+        throttler_settings: settings.Throttler,
         rbd_create_images: bool = True,
         rbd_wait_for_image: Callable[[int], None] = sleep_exponential(
             min_duration=5,
@@ -398,7 +401,9 @@ class ROShardCreator:
         **kwargs,
     ):
         self.pool = Pool.from_kwargs(**kwargs)
-        self.throttler = Throttler(**kwargs)
+        self.throttler = Throttler(
+            throttler_settings=throttler_settings, db=kwargs["base_dsn"]
+        )
         self.name = name
         self.count = count
         self.path = self.pool.image_path(self.name)
diff --git a/swh/objstorage/backends/winery/settings.py b/swh/objstorage/backends/winery/settings.py
index 1e329dc..d042401 100644
--- a/swh/objstorage/backends/winery/settings.py
+++ b/swh/objstorage/backends/winery/settings.py
@@ -139,8 +139,6 @@ def populate_default_settings(
 
     if throttler is not None:
         settings["throttler"] = throttler
-        legacy_kwargs["throttle_read"] = throttler["max_read_bps"]
-        legacy_kwargs["throttle_write"] = throttler["max_write_bps"]
 
     if packer is not None:
         packer = packer_settings_with_defaults(packer)
diff --git a/swh/objstorage/backends/winery/throttler.py b/swh/objstorage/backends/winery/throttler.py
index 89f6259..a38b4f2 100644
--- a/swh/objstorage/backends/winery/throttler.py
+++ b/swh/objstorage/backends/winery/throttler.py
@@ -8,6 +8,7 @@ import datetime
 import logging
 import time
 
+from . import settings
 from .database import Database
 
 logger = logging.getLogger(__name__)
@@ -99,15 +100,18 @@ class IOThrottler(Database):
     bandwidth is shared equally between instances.
     """
 
-    def __init__(self, name, **kwargs):
-        super().__init__(
-            dsn=kwargs["base_dsn"],
-            application_name=kwargs.get("application_name", "SWH Winery Throttler"),
-        )
+    def __init__(
+        self,
+        name: str,
+        max_speed: int,
+        db: str,
+        application_name: str = "SWH Winery Throttler",
+    ):
+        super().__init__(dsn=db, application_name=application_name)
         self.name = name
         self.init_db()
         self.last_sync = 0
-        self.max_speed = kwargs["throttle_" + name]
+        self.max_speed = max_speed
         self.bucket = LeakyBucket(self.max_speed)
         self.bandwidth = BandwidthCalculator()
 
@@ -178,9 +182,17 @@ class Throttler:
     cumulated bandwidth reported by each Throttler instance.
     """
 
-    def __init__(self, **kwargs):
-        self.read = IOThrottler("read", **kwargs)
-        self.write = IOThrottler("write", **kwargs)
+    def __init__(self, throttler_settings: settings.Throttler, db):
+        self.read = IOThrottler(
+            name="read",
+            max_speed=throttler_settings["max_read_bps"],
+            db=db,
+        )
+        self.write = IOThrottler(
+            name="write",
+            max_speed=throttler_settings["max_write_bps"],
+            db=db,
+        )
 
     def throttle_get(self, fun, key):
         content = fun(key)
diff --git a/swh/objstorage/tests/test_objstorage_winery.py b/swh/objstorage/tests/test_objstorage_winery.py
index fd8573c..c2bbba7 100644
--- a/swh/objstorage/tests/test_objstorage_winery.py
+++ b/swh/objstorage/tests/test_objstorage_winery.py
@@ -497,7 +497,12 @@ def test_winery_pack(winery_settings, winery_writer, image_pool):
     winery_writer.base.set_shard_state(ShardState.FULL)
     winery_writer.base.shard_packing_starts(shard)
 
-    assert pack(shard, packer_settings=winery_settings["packer"], **winery_writer.args)
+    assert pack(
+        shard,
+        packer_settings=winery_settings["packer"],
+        throttler_settings=winery_settings["throttler"],
+        **winery_writer.args,
+    )
     assert winery_writer.base.get_shard_state(shard) == ShardState.PACKED
 
     assert cleanup_rw_shard(shard, **winery_writer.args)
@@ -1039,7 +1044,12 @@ def test_winery_get_object(winery_settings, winery_writer, winery_reader, image_
     winery_writer.base.set_shard_state(ShardState.FULL)
     winery_writer.base.shard_packing_starts(shard)
     assert (
-        pack(shard, packer_settings=winery_settings["packer"], **winery_writer.args)
+        pack(
+            shard,
+            packer_settings=winery_settings["packer"],
+            throttler_settings=winery_settings["throttler"],
+            **winery_writer.args,
+        )
         is True
     )
     assert winery_reader.get(sha256) == content
@@ -1170,7 +1180,7 @@ def test_winery_bandwidth_calculator(mocker):
 def test_winery_io_throttler(postgresql_dsn, mocker):
     sleep = mocker.spy(time, "sleep")
     speed = 100
-    i = IOThrottler("read", base_dsn=postgresql_dsn, throttle_read=100)
+    i = IOThrottler(name="read", db=postgresql_dsn, max_speed=100)
     count = speed
     i.add(count)
     sleep.assert_not_called()
@@ -1189,7 +1199,10 @@ def test_winery_io_throttler(postgresql_dsn, mocker):
 
 
 def test_winery_throttler(postgresql_dsn):
-    t = Throttler(base_dsn=postgresql_dsn, throttle_read=100, throttle_write=100)
+    t = Throttler(
+        db=postgresql_dsn,
+        throttler_settings={"max_write_bps": 100, "max_read_bps": 100},
+    )
 
     base = {}
     key = "KEY"
-- 
GitLab


From 7b37883452a0283be6190be616c55d5cd577660e Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Mon, 3 Mar 2025 18:09:16 +0100
Subject: [PATCH 11/25] winery: Share throttler between ROShards

There's no point having multiple throttler instances for the separate
ROShards, it's even counterproductive as it generates a connection pool
for each instance, which in turn generates many threads.

Ref. swh/infra/sysadm-environment#5591
---
 swh/objstorage/backends/winery/objstorage.py | 20 ++++++++++----------
 swh/objstorage/backends/winery/roshard.py    | 14 +++++---------
 2 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
index 3b98efe..b011448 100644
--- a/swh/objstorage/backends/winery/objstorage.py
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -18,6 +18,7 @@ from .roshard import Pool, ROShard, ROShardCreator, ShardNotMapped
 from .rwshard import RWShard
 from .sharedbase import ShardState, SharedBase
 from .sleep import sleep_exponential
+from .throttler import Throttler
 
 logger = logging.getLogger(__name__)
 
@@ -47,10 +48,12 @@ class WineryObjStorage(ObjStorage):
             packer=(packer or {}),
         )
 
-        self.reader = WineryReader(
-            throttler_settings=self.settings["throttler"], **legacy_kwargs
+        self.throttler = Throttler(
+            throttler_settings=self.settings["throttler"], db=database["db"]
         )
 
+        self.reader = WineryReader(throttler=self.throttler, **legacy_kwargs)
+
         if readonly:
             self.writer = None
         else:
@@ -125,9 +128,9 @@ class WineryObjStorage(ObjStorage):
 
 
 class WineryReader:
-    def __init__(self, throttler_settings: settings.Throttler, **kwargs):
+    def __init__(self, throttler: Throttler, **kwargs):
         self.args = kwargs
-        self.throttler_settings = throttler_settings
+        self.throttler = throttler
         self.base = SharedBase(**self.args)
         self.ro_shards: Dict[str, ROShard] = {}
         self.rw_shards: Dict[str, RWShard] = {}
@@ -143,9 +146,7 @@ class WineryReader:
     def roshard(self, name) -> Optional[ROShard]:
         if name not in self.ro_shards:
             try:
-                shard = ROShard(
-                    name=name, throttler_settings=self.throttler_settings, **self.args
-                )
+                shard = ROShard(name=name, throttler=self.throttler, **self.args)
             except ShardNotMapped:
                 return None
             self.ro_shards[name] = shard
@@ -188,9 +189,8 @@ def pack(
 
     count = rw.count()
     logger.info("Creating RO shard %s for %s objects", shard, count)
-    with ROShardCreator(
-        shard, count, throttler_settings=throttler_settings, **kwargs
-    ) as ro:
+    throttler = Throttler(throttler_settings=throttler_settings, db=kwargs["base_dsn"])
+    with ROShardCreator(shard, count, throttler=throttler, **kwargs) as ro:
         logger.info("Created RO shard %s", shard)
         for i, (obj_id, content) in enumerate(rw.all()):
             ro.add(content, obj_id)
diff --git a/swh/objstorage/backends/winery/roshard.py b/swh/objstorage/backends/winery/roshard.py
index 3da1036..dbddd03 100644
--- a/swh/objstorage/backends/winery/roshard.py
+++ b/swh/objstorage/backends/winery/roshard.py
@@ -322,7 +322,7 @@ class Pool(object):
 
 
 class ROShard:
-    def __init__(self, name, throttler_settings, **kwargs):
+    def __init__(self, name, throttler, **kwargs):
         self.pool = Pool.from_kwargs(**kwargs)
         image_status = self.pool.image_mapped(name)
 
@@ -331,9 +331,7 @@ class ROShard:
                 f"RBD image for {name} isn't mapped{' read-only' if image_status=='rw' else ''}"
             )
 
-        self.throttler = Throttler(
-            throttler_settings=throttler_settings, db=kwargs["base_dsn"]
-        )
+        self.throttler = throttler
         self.name = name
         self.path = self.pool.image_path(self.name)
         self.shard = None
@@ -378,19 +376,19 @@ class ROShardCreator:
     Arguments:
       name: Name of the shard to be initialized
       count: Number of objects to provision in the shard
+      throttler: An instance of a winery throttler
       rbd_create_images: whether the ROShardCreator should create the rbd
         image, or delegate to the rbd_shard_manager
       rbd_wait_for_image: function called when waiting for a shard to be mapped
       shard_max_size: the size of the shard, passed to :class:`Pool`
       rbd_*: other RBD-related :class:`Pool` arguments
-      throttler_settings: passed to the :class:`Throttler`
     """
 
     def __init__(
         self,
         name: str,
         count: int,
-        throttler_settings: settings.Throttler,
+        throttler: Throttler,
         rbd_create_images: bool = True,
         rbd_wait_for_image: Callable[[int], None] = sleep_exponential(
             min_duration=5,
@@ -401,9 +399,7 @@ class ROShardCreator:
         **kwargs,
     ):
         self.pool = Pool.from_kwargs(**kwargs)
-        self.throttler = Throttler(
-            throttler_settings=throttler_settings, db=kwargs["base_dsn"]
-        )
+        self.throttler = throttler
         self.name = name
         self.count = count
         self.path = self.pool.image_path(self.name)
-- 
GitLab


From 54873546770e9dee3f145064b78cbdb5e81291a7 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Tue, 4 Mar 2025 16:12:45 +0100
Subject: [PATCH 12/25] winery rbd shard mapper: notify systemd at every new
 shard

This allows following the (slow) mapping process more closely
---
 swh/objstorage/backends/winery/roshard.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/swh/objstorage/backends/winery/roshard.py b/swh/objstorage/backends/winery/roshard.py
index dbddd03..620193c 100644
--- a/swh/objstorage/backends/winery/roshard.py
+++ b/swh/objstorage/backends/winery/roshard.py
@@ -232,12 +232,6 @@ class Pool(object):
                 )
                 logger.debug("Mapped images: %s", Counter(mapped_images.values()))
 
-            notify(
-                f"STATUS="
-                "Enumerated {len(shards)} shards, "
-                f"mapped {len(mapped_images)} images"
-            )
-
             for shard_name, shard_state in shards:
                 mapped_state = mapped_images.get(shard_name)
                 if mapped_state == "ro":
@@ -308,6 +302,12 @@ class Pool(object):
                 else:
                     logger.debug("%s shard %s, skipping", shard_state.name, shard_name)
 
+                notify(
+                    "STATUS="
+                    f"Enumerated {len(shards)} shards, "
+                    f"mapped {len(mapped_images)} images"
+                )
+
             if not notified_systemd:
                 # The first iteration has happened, all known shards should be ready
                 notify("READY=1")
-- 
GitLab


From ece77d05c265fec1692737764e4a745fa996ffb4 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Tue, 4 Mar 2025 16:18:19 +0100
Subject: [PATCH 13/25] winery rbd shard mapper: allow acting only on a subset
 of shards

This will give us the option to map shards in parallel
---
 swh/objstorage/backends/winery/roshard.py     |  8 ++++++-
 swh/objstorage/cli.py                         |  7 +++++-
 .../tests/test_objstorage_winery.py           | 24 +++++++++++++++++++
 3 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/swh/objstorage/backends/winery/roshard.py b/swh/objstorage/backends/winery/roshard.py
index 620193c..fc12bfb 100644
--- a/swh/objstorage/backends/winery/roshard.py
+++ b/swh/objstorage/backends/winery/roshard.py
@@ -200,6 +200,7 @@ class Pool(object):
         manage_rw_images: bool,
         wait_for_image: Callable[[int], None],
         stop_running: Callable[[], bool],
+        only_prefix: Optional[str] = None,
         application_name: Optional[str] = None,
     ) -> None:
         """Manage RBD image creation and mapping automatically.
@@ -210,6 +211,7 @@ class Pool(object):
           wait_for_image: function which is called at each loop iteration, with
             an attempt number, if no images had to be mapped recently
           stop_running: callback that returns True when the manager should stop running
+          only_prefix: only map images with the given name prefix
           application_name: the application name sent to PostgreSQL
         """
         application_name = application_name or "Winery RBD image manager"
@@ -223,7 +225,11 @@ class Pool(object):
             did_something = False
             logger.debug("Listing shards")
             start = time.monotonic()
-            shards = list(base.list_shards())
+            shards = [
+                (shard_name, shard_state)
+                for shard_name, shard_state in base.list_shards()
+                if not only_prefix or shard_name.startswith(only_prefix)
+            ]
             if logger.isEnabledFor(logging.DEBUG):
                 logger.debug(
                     "Listed %d shards in %.02f seconds",
diff --git a/swh/objstorage/cli.py b/swh/objstorage/cli.py
index 7ab915c..b6677d1 100644
--- a/swh/objstorage/cli.py
+++ b/swh/objstorage/cli.py
@@ -147,9 +147,13 @@ def winery_packer(ctx, stop_after_shards: Optional[int] = None):
 @winery.command("rbd")
 @click.option("--stop-instead-of-waiting", is_flag=True)
 @click.option("--manage-rw-images", is_flag=True)
+@click.option("--only-prefix")
 @click.pass_context
 def winery_rbd(
-    ctx, stop_instead_of_waiting: bool = False, manage_rw_images: bool = True
+    ctx,
+    stop_instead_of_waiting: bool = False,
+    manage_rw_images: bool = True,
+    only_prefix: Optional[str] = None,
 ):
     """Run a winery RBD image manager process"""
     import signal
@@ -199,6 +203,7 @@ def winery_rbd(
         base_dsn=legacy_kwargs["base_dsn"],
         manage_rw_images=manage_rw_images,
         wait_for_image=wait_for_image,
+        only_prefix=only_prefix,
         stop_running=stop_running,
     )
 
diff --git a/swh/objstorage/tests/test_objstorage_winery.py b/swh/objstorage/tests/test_objstorage_winery.py
index c2bbba7..9f676c9 100644
--- a/swh/objstorage/tests/test_objstorage_winery.py
+++ b/swh/objstorage/tests/test_objstorage_winery.py
@@ -824,6 +824,30 @@ def test_winery_cli_rbd(image_pool, storage, tmp_path, winery_settings, cli_runn
     for shard in filled:
         assert image_pool.image_mapped(shard) is None
 
+    first_shard = filled[0]
+
+    result = cli_runner.invoke(
+        swh_cli_group,
+        (
+            "objstorage",
+            "winery",
+            "rbd",
+            "--stop-instead-of-waiting",
+            "--only-prefix",
+            first_shard[:10],
+            "--manage-rw-images",
+        ),
+        env={"SWH_CONFIG_FILENAME": str(tmp_path / "config.yml")},
+    )
+
+    assert result.exit_code == 0
+
+    for shard in filled:
+        if shard == first_shard:
+            assert image_pool.image_mapped(shard) == "rw"
+        else:
+            assert image_pool.image_mapped(shard) is None
+
     result = cli_runner.invoke(
         swh_cli_group,
         (
-- 
GitLab


From a8b2e45911c33f5ce3b17a08c4601b4b2d44d1a4 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Tue, 4 Mar 2025 16:19:01 +0100
Subject: [PATCH 14/25] winery rbd shard mapper: randomize shards

This reduces contention across parallel processes (if they act on
overlapping shards): a crash can happen if two rbd processes call `rbd
map` on the same shard at the same time.
---
 swh/objstorage/backends/winery/roshard.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/swh/objstorage/backends/winery/roshard.py b/swh/objstorage/backends/winery/roshard.py
index fc12bfb..35ede3a 100644
--- a/swh/objstorage/backends/winery/roshard.py
+++ b/swh/objstorage/backends/winery/roshard.py
@@ -7,6 +7,7 @@ from collections import Counter
 import logging
 import math
 import os
+import random
 import shlex
 import socket
 import stat
@@ -230,6 +231,7 @@ class Pool(object):
                 for shard_name, shard_state in base.list_shards()
                 if not only_prefix or shard_name.startswith(only_prefix)
             ]
+            random.shuffle(shards)
             if logger.isEnabledFor(logging.DEBUG):
                 logger.debug(
                     "Listed %d shards in %.02f seconds",
-- 
GitLab


From c156e5b5770a12edf21375f214a44256dcfb4567 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Wed, 5 Mar 2025 11:28:30 +0100
Subject: [PATCH 15/25] winery: make throttler optional

Extend tests to support instantiating winery with or without a
throttler.

Move throttler database connection setting to the throttler dict, so
that it can be split from the main database connection (e.g. on
read-only workers)
---
 conftest.py                                   |  4 ++
 swh/objstorage/backends/winery/objstorage.py  | 13 +++---
 swh/objstorage/backends/winery/settings.py    | 11 ++++-
 swh/objstorage/backends/winery/throttler.py   | 27 +++++++++++--
 .../tests/test_objstorage_winery.py           | 40 ++++++++++++++++---
 5 files changed, 77 insertions(+), 18 deletions(-)

diff --git a/conftest.py b/conftest.py
index 5567392..7de7702 100644
--- a/conftest.py
+++ b/conftest.py
@@ -13,6 +13,10 @@ def pytest_configure(config):
         "clean_immediately(bool): whether the winery packer should clean rw "
         "shards immediately",
     )
+    config.addinivalue_line(
+        "markers",
+        "use_throttler(bool): whether the winery storage should use a throttler",
+    )
     config.addinivalue_line(
         "markers",
         (
diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
index b011448..1aaf392 100644
--- a/swh/objstorage/backends/winery/objstorage.py
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -48,10 +48,7 @@ class WineryObjStorage(ObjStorage):
             packer=(packer or {}),
         )
 
-        self.throttler = Throttler(
-            throttler_settings=self.settings["throttler"], db=database["db"]
-        )
-
+        self.throttler = Throttler.from_settings(self.settings)
         self.reader = WineryReader(throttler=self.throttler, **legacy_kwargs)
 
         if readonly:
@@ -59,7 +56,7 @@ class WineryObjStorage(ObjStorage):
         else:
             self.writer = WineryWriter(
                 packer_settings=self.settings["packer"],
-                throttler_settings=self.settings["throttler"],
+                throttler_settings=self.settings.get("throttler"),
                 **legacy_kwargs,
             )
 
@@ -181,7 +178,7 @@ class WineryReader:
 def pack(
     shard,
     packer_settings: settings.Packer,
-    throttler_settings: settings.Throttler,
+    throttler_settings: Optional[settings.Throttler],
     shared_base=None,
     **kwargs,
 ) -> bool:
@@ -189,7 +186,7 @@ def pack(
 
     count = rw.count()
     logger.info("Creating RO shard %s for %s objects", shard, count)
-    throttler = Throttler(throttler_settings=throttler_settings, db=kwargs["base_dsn"])
+    throttler = Throttler.from_settings({"throttler": throttler_settings})
     with ROShardCreator(shard, count, throttler=throttler, **kwargs) as ro:
         logger.info("Created RO shard %s", shard)
         for i, (obj_id, content) in enumerate(rw.all()):
@@ -225,7 +222,7 @@ class WineryWriter:
     def __init__(
         self,
         packer_settings: settings.Packer,
-        throttler_settings: settings.Throttler,
+        throttler_settings: Optional[settings.Throttler],
         rwshard_idle_timeout: float = 300,
         **kwargs,
     ):
diff --git a/swh/objstorage/backends/winery/settings.py b/swh/objstorage/backends/winery/settings.py
index d042401..2bb59eb 100644
--- a/swh/objstorage/backends/winery/settings.py
+++ b/swh/objstorage/backends/winery/settings.py
@@ -73,8 +73,12 @@ def rbd_shards_pool_settings_with_defaults(
 class Throttler(TypedDict):
     """Settings for the winery throttler"""
 
+    db: NotRequired[str]
+    """Throttler database connection string"""
     max_read_bps: int
+    """Max read bytes per second"""
     max_write_bps: int
+    """Max write bytes per second"""
 
 
 class Database(TypedDict):
@@ -97,7 +101,7 @@ class Winery(TypedDict, total=False):
     database: Database
     shards: Shards
     shards_pool: RbdShardsPool
-    throttler: Throttler
+    throttler: Optional[Throttler]
     packer: Packer
 
 
@@ -138,7 +142,10 @@ def populate_default_settings(
             raise ValueError(f"Unknown shards pool type: {shards_pool['type']}")
 
     if throttler is not None:
-        settings["throttler"] = throttler
+        if "db" not in throttler:
+            settings["throttler"] = {"db": settings["database"]["db"], **throttler}
+        else:
+            settings["throttler"] = throttler
 
     if packer is not None:
         packer = packer_settings_with_defaults(packer)
diff --git a/swh/objstorage/backends/winery/throttler.py b/swh/objstorage/backends/winery/throttler.py
index a38b4f2..c336324 100644
--- a/swh/objstorage/backends/winery/throttler.py
+++ b/swh/objstorage/backends/winery/throttler.py
@@ -182,15 +182,23 @@ class Throttler:
     cumulated bandwidth reported by each Throttler instance.
     """
 
-    def __init__(self, throttler_settings: settings.Throttler, db):
+    @staticmethod
+    def from_settings(settings: settings.Winery) -> "Throttler":
+        """Return a throttler initialized from settings"""
+        if "throttler" in settings and settings["throttler"]:
+            return Throttler(**settings["throttler"])
+        else:
+            return NoopThrottler()
+
+    def __init__(self, db: str, max_read_bps: int, max_write_bps: int):
         self.read = IOThrottler(
             name="read",
-            max_speed=throttler_settings["max_read_bps"],
+            max_speed=max_read_bps,
             db=db,
         )
         self.write = IOThrottler(
             name="write",
-            max_speed=throttler_settings["max_write_bps"],
+            max_speed=max_write_bps,
             db=db,
         )
 
@@ -202,3 +210,16 @@ class Throttler:
     def throttle_add(self, fun, obj_id, content):
         self.write.add(len(obj_id) + len(content))
         return fun(obj_id, content)
+
+
+class NoopThrottler(Throttler):
+    """A throttler that does nothing"""
+
+    def __init__(self):
+        pass
+
+    def throttle_get(self, fun, key):
+        return fun(key)
+
+    def throttle_add(self, fun, obj_id, content):
+        return fun(obj_id, content)
diff --git a/swh/objstorage/tests/test_objstorage_winery.py b/swh/objstorage/tests/test_objstorage_winery.py
index 9f676c9..5af8d70 100644
--- a/swh/objstorage/tests/test_objstorage_winery.py
+++ b/swh/objstorage/tests/test_objstorage_winery.py
@@ -195,6 +195,15 @@ def clean_immediately(request) -> bool:
         return marker.args[0]
 
 
+@pytest.fixture
+def use_throttler(request) -> int:
+    marker = request.node.get_closest_marker("use_throttler")
+    if marker is None:
+        return True
+    else:
+        return marker.args[0]
+
+
 @pytest.fixture
 def winery_settings(
     postgresql_dsn,
@@ -203,14 +212,20 @@ def winery_settings(
     clean_immediately,
     rbd_pool_name,
     rbd_map_options,
+    use_throttler,
 ) -> settings.Winery:
     return dict(
         shards={"max_size": shard_max_size},
         database={"db": postgresql_dsn},
-        throttler={
-            "max_write_bps": 200 * 1024 * 1024,
-            "max_read_bps": 100 * 1024 * 1024,
-        },
+        throttler=(
+            {
+                "db": postgresql_dsn,
+                "max_write_bps": 200 * 1024 * 1024,
+                "max_read_bps": 100 * 1024 * 1024,
+            }
+            if use_throttler
+            else None
+        ),
         packer={
             "pack_immediately": pack_immediately,
             "clean_immediately": clean_immediately,
@@ -291,6 +306,13 @@ def test_winery_add_get(winery_writer, winery_reader):
     winery_writer.shard.drop()
 
 
+@pytest.mark.parametrize(
+    (),
+    [
+        pytest.param(marks=pytest.mark.use_throttler(False), id="throttler=False"),
+        pytest.param(marks=pytest.mark.use_throttler(True), id="throttler=True"),
+    ],
+)
 def test_winery_add_concurrent(winery_settings, mocker):
     num_threads = 4
 
@@ -341,6 +363,13 @@ def test_winery_add_concurrent(winery_settings, mocker):
 
 
 @pytest.mark.shard_max_size(1)
+@pytest.mark.parametrize(
+    (),
+    [
+        pytest.param(marks=pytest.mark.use_throttler(False), id="throttler=False"),
+        pytest.param(marks=pytest.mark.use_throttler(True), id="throttler=True"),
+    ],
+)
 def test_winery_add_and_pack(winery_writer, mocker):
     mocker.patch("swh.objstorage.backends.winery.objstorage.pack", return_value=True)
     shard = winery_writer.base.locked_shard
@@ -1225,7 +1254,8 @@ def test_winery_io_throttler(postgresql_dsn, mocker):
 def test_winery_throttler(postgresql_dsn):
     t = Throttler(
         db=postgresql_dsn,
-        throttler_settings={"max_write_bps": 100, "max_read_bps": 100},
+        max_write_bps=100,
+        max_read_bps=100,
     )
 
     base = {}
-- 
GitLab


From 914a22592539aa65e4353dc5b8d5c39906efa5bc Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Wed, 5 Mar 2025 16:10:57 +0100
Subject: [PATCH 16/25] winery: move RBD settings to the new settings framework

---
 swh/objstorage/backends/winery/objstorage.py | 87 ++++++++++++++------
 swh/objstorage/backends/winery/roshard.py    | 26 +++++-
 swh/objstorage/backends/winery/settings.py   | 12 ++-
 3 files changed, 92 insertions(+), 33 deletions(-)

diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
index 1aaf392..19ba612 100644
--- a/swh/objstorage/backends/winery/objstorage.py
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -13,8 +13,7 @@ from swh.objstorage.exc import ObjNotFoundError, ReadOnlyObjStorageError
 from swh.objstorage.interface import CompositeObjId, ObjId
 from swh.objstorage.objstorage import ObjStorage, timed
 
-from . import settings
-from .roshard import Pool, ROShard, ROShardCreator, ShardNotMapped
+from . import roshard, settings
 from .rwshard import RWShard
 from .sharedbase import ShardState, SharedBase
 from .sleep import sleep_exponential
@@ -49,7 +48,13 @@ class WineryObjStorage(ObjStorage):
         )
 
         self.throttler = Throttler.from_settings(self.settings)
-        self.reader = WineryReader(throttler=self.throttler, **legacy_kwargs)
+        self.pool = roshard.pool_from_settings(
+            shards_settings=self.settings["shards"],
+            shards_pool_settings=self.settings["shards_pool"],
+        )
+        self.reader = WineryReader(
+            throttler=self.throttler, pool=self.pool, **legacy_kwargs
+        )
 
         if readonly:
             self.writer = None
@@ -57,6 +62,8 @@ class WineryObjStorage(ObjStorage):
             self.writer = WineryWriter(
                 packer_settings=self.settings["packer"],
                 throttler_settings=self.settings.get("throttler"),
+                shards_settings=self.settings["shards"],
+                shards_pool_settings=self.settings["shards_pool"],
                 **legacy_kwargs,
             )
 
@@ -125,11 +132,13 @@ class WineryObjStorage(ObjStorage):
 
 
 class WineryReader:
-    def __init__(self, throttler: Throttler, **kwargs):
+    def __init__(self, throttler: Throttler, pool: roshard.Pool, **kwargs):
         self.args = kwargs
         self.throttler = throttler
+        self.pool = pool
+        self.base_dsn = self.args["base_dsn"]
         self.base = SharedBase(**self.args)
-        self.ro_shards: Dict[str, ROShard] = {}
+        self.ro_shards: Dict[str, roshard.ROShard] = {}
         self.rw_shards: Dict[str, RWShard] = {}
 
     def __contains__(self, obj_id):
@@ -140,11 +149,13 @@ class WineryReader:
     ) -> Iterator[bytes]:
         yield from self.base.list_signatures(after_id, limit)
 
-    def roshard(self, name) -> Optional[ROShard]:
+    def roshard(self, name) -> Optional[roshard.ROShard]:
         if name not in self.ro_shards:
             try:
-                shard = ROShard(name=name, throttler=self.throttler, **self.args)
-            except ShardNotMapped:
+                shard = roshard.ROShard(
+                    name=name, throttler=self.throttler, pool=self.pool, **self.args
+                )
+            except roshard.ShardNotMapped:
                 return None
             self.ro_shards[name] = shard
             if name in self.rw_shards:
@@ -153,7 +164,7 @@ class WineryReader:
 
     def rwshard(self, name) -> RWShard:
         if name not in self.rw_shards:
-            shard = RWShard(name, **self.args)
+            shard = RWShard(name, shard_max_size=0, base_dsn=self.base_dsn)
             self.rw_shards[name] = shard
         return self.rw_shards[name]
 
@@ -176,18 +187,29 @@ class WineryReader:
 
 
 def pack(
-    shard,
+    shard: str,
+    base_dsn: str,
     packer_settings: settings.Packer,
     throttler_settings: Optional[settings.Throttler],
-    shared_base=None,
-    **kwargs,
+    shards_settings: settings.Shards,
+    shards_pool_settings: settings.RbdShardsPool,
+    shared_base: Optional[SharedBase] = None,
 ) -> bool:
-    rw = RWShard(shard, **kwargs)
+    rw = RWShard(shard, shard_max_size=shards_settings["max_size"], base_dsn=base_dsn)
 
     count = rw.count()
     logger.info("Creating RO shard %s for %s objects", shard, count)
     throttler = Throttler.from_settings({"throttler": throttler_settings})
-    with ROShardCreator(shard, count, throttler=throttler, **kwargs) as ro:
+    pool = roshard.pool_from_settings(
+        shards_settings=shards_settings, shards_pool_settings=shards_pool_settings
+    )
+    with roshard.ROShardCreator(
+        name=shard,
+        count=count,
+        throttler=throttler,
+        pool=pool,
+        rbd_create_images=packer_settings["create_images"],
+    ) as ro:
         logger.info("Created RO shard %s", shard)
         for i, (obj_id, content) in enumerate(rw.all()):
             ro.add(content, obj_id)
@@ -199,20 +221,22 @@ def pack(
     logger.info("RO shard %s: saved", shard)
 
     if not shared_base:
-        shared_base = SharedBase(**kwargs)
+        shared_base = SharedBase(base_dsn=base_dsn)
     shared_base.shard_packing_ends(shard)
     if packer_settings["clean_immediately"]:
-        cleanup_rw_shard(shard, shared_base=shared_base, **kwargs)
+        cleanup_rw_shard(shard, shared_base=shared_base)
     return True
 
 
-def cleanup_rw_shard(shard, shared_base=None, **kwargs) -> bool:
-    rw = RWShard(shard, **{"shard_max_size": 0, **kwargs})
+def cleanup_rw_shard(shard, base_dsn=None, shared_base=None) -> bool:
+    if shared_base is not None and not base_dsn:
+        base_dsn = shared_base.dsn
+    rw = RWShard(name=shard, shard_max_size=0, base_dsn=base_dsn)
 
     rw.drop()
 
     if not shared_base:
-        shared_base = SharedBase(**kwargs)
+        shared_base = SharedBase(base_dsn=base_dsn)
     shared_base.set_shard_state(name=shard, new_state=ShardState.READONLY)
 
     return True
@@ -223,11 +247,15 @@ class WineryWriter:
         self,
         packer_settings: settings.Packer,
         throttler_settings: Optional[settings.Throttler],
+        shards_settings: settings.Shards,
+        shards_pool_settings: settings.RbdShardsPool,
         rwshard_idle_timeout: float = 300,
         **kwargs,
     ):
         self.packer_settings = packer_settings
         self.throttler_settings = throttler_settings
+        self.shards_settings = shards_settings
+        self.shards_pool_settings = shards_pool_settings
         self.args = kwargs
         self.base = SharedBase(**self.args)
         self.shards_filled: List[str] = []
@@ -261,10 +289,11 @@ class WineryWriter:
         """Lock a shard to be able to use it. Release it after :attr:`idle_timeout`."""
         if not self._shard:
             self._shard = RWShard(
-                self.base.locked_shard,
+                name=self.base.locked_shard,
+                base_dsn=self.args["base_dsn"],
+                shard_max_size=self.shards_settings["max_size"],
                 idle_timeout_cb=partial(self.release_shard, from_idle_handler=True),
                 idle_timeout=self.idle_timeout,
-                **self.args,
             )
             logger.debug(
                 "WineryBase: locked RWShard %s, releasing it in %s",
@@ -297,7 +326,7 @@ class WineryWriter:
         # We only care about RWShard for now. ROShards will be
         # taken care in a batch job.
         if not state.image_available:
-            rwshard = RWShard(name, **self.args)
+            rwshard = RWShard(name, shard_max_size=0, base_dsn=self.base.dsn)
             try:
                 rwshard.delete(obj_id)
             except KeyError:
@@ -322,9 +351,11 @@ class WineryWriter:
             target=pack,
             kwargs={
                 "shard": shard_name,
+                "base_dsn": self.base.dsn,
                 "packer_settings": self.packer_settings,
                 "throttler_settings": self.throttler_settings,
-                **self.args,
+                "shards_settings": self.shards_settings,
+                "shards_pool_settings": self.shards_pool_settings,
             },
         )
         p.start()
@@ -413,11 +444,15 @@ def shard_packer(
         waited_for_shards = 0
 
         with locked:
+            if locked.name is None:
+                raise RuntimeError("No shard has been locked?")
             logger.info("shard_packer: Locked shard %s to pack", locked.name)
             ret = pack(
-                locked.name,
+                shard=locked.name,
                 packer_settings=all_settings["packer"],
                 throttler_settings=all_settings["throttler"],
+                shards_settings=all_settings["shards"],
+                shards_pool_settings=all_settings["shards_pool"],
                 shared_base=base,
                 **legacy_kwargs,
             )
@@ -488,7 +523,7 @@ def rw_shard_cleaner(
 
 def deleted_objects_cleaner(
     base: SharedBase,
-    pool: Pool,
+    pool: roshard.Pool,
     stop_running: Callable[[], bool],
 ):
     """Clean up deleted objects from RO shards and the shared database.
@@ -507,7 +542,7 @@ def deleted_objects_cleaner(
         if stop_running():
             break
         if shard_state.readonly:
-            ROShard.delete(pool, shard_name, obj_id)
+            roshard.ROShard.delete(pool, shard_name, obj_id)
         base.clean_deleted_object(obj_id)
         count += 1
 
diff --git a/swh/objstorage/backends/winery/roshard.py b/swh/objstorage/backends/winery/roshard.py
index 35ede3a..074cf50 100644
--- a/swh/objstorage/backends/winery/roshard.py
+++ b/swh/objstorage/backends/winery/roshard.py
@@ -329,9 +329,28 @@ class Pool(object):
                 attempt += 1
 
 
+def pool_from_settings(
+    shards_settings: settings.Shards, shards_pool_settings: settings.RbdShardsPool
+) -> Pool:
+    pool_type = shards_pool_settings["type"]
+    if pool_type == "rbd":
+        return Pool(
+            shard_max_size=shards_settings["max_size"],
+            rbd_use_sudo=shards_pool_settings["use_sudo"],
+            rbd_pool_name=shards_pool_settings["pool_name"],
+            rbd_data_pool_name=shards_pool_settings["data_pool_name"],
+            rbd_image_features_unsupported=shards_pool_settings[
+                "image_features_unsupported"
+            ],
+            rbd_map_options=shards_pool_settings["map_options"],
+        )
+    else:
+        raise ValueError(f"Unknown shards pool type: {pool_type}")
+
+
 class ROShard:
-    def __init__(self, name, throttler, **kwargs):
-        self.pool = Pool.from_kwargs(**kwargs)
+    def __init__(self, name, throttler, pool, **kwargs):
+        self.pool = pool
         image_status = self.pool.image_mapped(name)
 
         if image_status != "ro":
@@ -397,6 +416,7 @@ class ROShardCreator:
         name: str,
         count: int,
         throttler: Throttler,
+        pool: Pool,
         rbd_create_images: bool = True,
         rbd_wait_for_image: Callable[[int], None] = sleep_exponential(
             min_duration=5,
@@ -406,7 +426,7 @@ class ROShardCreator:
         ),
         **kwargs,
     ):
-        self.pool = Pool.from_kwargs(**kwargs)
+        self.pool = pool
         self.throttler = throttler
         self.name = name
         self.count = count
diff --git a/swh/objstorage/backends/winery/settings.py b/swh/objstorage/backends/winery/settings.py
index 2bb59eb..47737a8 100644
--- a/swh/objstorage/backends/winery/settings.py
+++ b/swh/objstorage/backends/winery/settings.py
@@ -13,6 +13,8 @@ DEFAULT_IMAGE_FEATURES_UNSUPPORTED: Tuple[str, ...] = ()
 class Packer(TypedDict):
     """Settings for the packer process, either external or internal"""
 
+    create_images: NotRequired[bool]
+    """Whether to create the images"""
     pack_immediately: NotRequired[bool]
     """Immediately pack shards (in a separate thread) when overflowing"""
     clean_immediately: NotRequired[bool]
@@ -21,7 +23,12 @@ class Packer(TypedDict):
 
 def packer_settings_with_defaults(values: Packer) -> Packer:
     """Hydrate Packer settings with default values"""
-    return {"pack_immediately": True, "clean_immediately": True, **values}
+    return {
+        "create_images": True,
+        "pack_immediately": True,
+        "clean_immediately": True,
+        **values,
+    }
 
 
 class Shards(TypedDict):
@@ -129,15 +136,12 @@ def populate_default_settings(
     if shards is not None:
         shards = shards_settings_with_defaults(shards)
         settings["shards"] = shards
-        legacy_kwargs["shard_max_size"] = shards["max_size"]
         legacy_kwargs["rwshard_idle_timeout"] = shards["rw_idle_timeout"]
 
     if shards_pool is not None:
         if shards_pool["type"] == "rbd":
             shards_pool = rbd_shards_pool_settings_with_defaults(shards_pool)
             settings["shards_pool"] = shards_pool
-            for k, v in shards_pool.items():
-                legacy_kwargs[f"rbd_{k}"] = v
         else:
             raise ValueError(f"Unknown shards pool type: {shards_pool['type']}")
 
-- 
GitLab


From ec5f9e056aebc68f36fefc3b4a2a890beb16a25e Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Wed, 5 Mar 2025 16:16:11 +0100
Subject: [PATCH 17/25] winery: make RBD shard manager a function rather than a
 method of the pool

This should make it easier to substitute the implementation
---
 swh/objstorage/backends/winery/roshard.py | 299 +++++++++++-----------
 swh/objstorage/cli.py                     |   5 +-
 2 files changed, 152 insertions(+), 152 deletions(-)

diff --git a/swh/objstorage/backends/winery/roshard.py b/swh/objstorage/backends/winery/roshard.py
index 074cf50..c4f719c 100644
--- a/swh/objstorage/backends/winery/roshard.py
+++ b/swh/objstorage/backends/winery/roshard.py
@@ -170,163 +170,162 @@ class Pool(object):
                 else:
                     raise
 
-    @staticmethod
-    def record_shard_mapped(base: SharedBase, shard_name: str):
-        """Record a shard as mapped, bailing out after a few attempts.
 
-        Multiple attempts are used to handle a race condition when two hosts
-        attempt to record the shard as mapped at the same time. In this
-        situation, one of the two hosts will succeed and the other one will
-        fail, the sleep delay can be kept short and linear.
+def record_shard_mapped(base: SharedBase, shard_name: str):
+    """Record a shard as mapped, bailing out after a few attempts.
 
-        """
-        outer_exc = None
-        for attempt in range(5):
-            try:
-                base.record_shard_mapped(host=socket.gethostname(), name=shard_name)
-                break
-            except Exception as exc:
-                outer_exc = exc
-                logger.warning(
-                    "Failed to mark shard %s as mapped, retrying...", shard_name
-                )
-                time.sleep(attempt + 1)
-        else:
-            assert outer_exc is not None
-            raise outer_exc
+    Multiple attempts are used to handle a race condition when two hosts
+    attempt to record the shard as mapped at the same time. In this
+    situation, one of the two hosts will succeed and the other one will
+    fail, the sleep delay can be kept short and linear.
 
-    def manage_images(
-        self,
-        base_dsn: str,
-        manage_rw_images: bool,
-        wait_for_image: Callable[[int], None],
-        stop_running: Callable[[], bool],
-        only_prefix: Optional[str] = None,
-        application_name: Optional[str] = None,
-    ) -> None:
-        """Manage RBD image creation and mapping automatically.
-
-        Arguments:
-          base_dsn: the DSN of the connection to the SharedBase
-          manage_rw_images: whether RW images should be created and mapped
-          wait_for_image: function which is called at each loop iteration, with
-            an attempt number, if no images had to be mapped recently
-          stop_running: callback that returns True when the manager should stop running
-          only_prefix: only map images with the given name prefix
-          application_name: the application name sent to PostgreSQL
-        """
-        application_name = application_name or "Winery RBD image manager"
-        base = SharedBase(base_dsn=base_dsn, application_name=application_name)
-
-        mapped_images: Dict[str, Literal["ro", "rw"]] = {}
-
-        attempt = 0
-        notified_systemd = False
-        while not stop_running():
-            did_something = False
-            logger.debug("Listing shards")
-            start = time.monotonic()
-            shards = [
-                (shard_name, shard_state)
-                for shard_name, shard_state in base.list_shards()
-                if not only_prefix or shard_name.startswith(only_prefix)
-            ]
-            random.shuffle(shards)
-            if logger.isEnabledFor(logging.DEBUG):
-                logger.debug(
-                    "Listed %d shards in %.02f seconds",
-                    len(shards),
-                    time.monotonic() - start,
-                )
-                logger.debug("Mapped images: %s", Counter(mapped_images.values()))
-
-            for shard_name, shard_state in shards:
-                mapped_state = mapped_images.get(shard_name)
-                if mapped_state == "ro":
-                    if shard_state == ShardState.PACKED:
-                        self.record_shard_mapped(base, shard_name)
-                    continue
-                elif shard_state.image_available:
-                    check_mapped = self.image_mapped(shard_name)
-                    if check_mapped == "ro":
-                        logger.debug(
-                            "Detected %s shard %s, already mapped read-only",
-                            shard_state.name,
-                            shard_name,
-                        )
-                    elif check_mapped == "rw":
-                        logger.info(
-                            "Detected %s shard %s, remapping read-only",
-                            shard_state.name,
-                            shard_name,
-                        )
-                        self.image_remap_ro(shard_name)
-                        attempt = 0
-                        while self.image_mapped(shard_name) != "ro":
-                            attempt += 1
-                            time.sleep(0.1)
-                            if attempt % 100 == 0:
-                                logger.warning(
-                                    "Waiting for %s shard %s to be remapped "
-                                    "read-only (for %ds)",
-                                    shard_state.name,
-                                    shard_name,
-                                    attempt / 10,
-                                )
-                        self.record_shard_mapped(base, shard_name)
-                        did_something = True
-                    else:
-                        logger.debug(
-                            "Detected %s shard %s, mapping read-only",
-                            shard_state.name,
-                            shard_name,
-                        )
-                        self.image_map(shard_name, options="ro")
-                        self.record_shard_mapped(base, shard_name)
-                        did_something = True
-                    mapped_images[shard_name] = "ro"
-                elif manage_rw_images:
-                    if os.path.exists(self.image_path(shard_name)):
-                        # Image already mapped, nothing to do
-                        pass
-                    elif not self.image_exists(shard_name):
-                        logger.info(
-                            "Detected %s shard %s, creating RBD image",
-                            shard_state.name,
-                            shard_name,
-                        )
-                        self.image_create(shard_name)
-                        did_something = True
-                    else:
-                        logger.warn(
-                            "Detected %s shard %s and RBD image exists, mapping read-write",
-                            shard_state.name,
-                            shard_name,
-                        )
-                        self.image_map(shard_name, "rw")
-                        did_something = True
-                    # Now the shard is mapped
-                    mapped_images[shard_name] = "rw"
-                else:
-                    logger.debug("%s shard %s, skipping", shard_state.name, shard_name)
+    """
+    outer_exc = None
+    for attempt in range(5):
+        try:
+            base.record_shard_mapped(host=socket.gethostname(), name=shard_name)
+            break
+        except Exception as exc:
+            outer_exc = exc
+            logger.warning("Failed to mark shard %s as mapped, retrying...", shard_name)
+            time.sleep(attempt + 1)
+    else:
+        assert outer_exc is not None
+        raise outer_exc
 
-                notify(
-                    "STATUS="
-                    f"Enumerated {len(shards)} shards, "
-                    f"mapped {len(mapped_images)} images"
-                )
 
-            if not notified_systemd:
-                # The first iteration has happened, all known shards should be ready
-                notify("READY=1")
-                notified_systemd = True
+def manage_images(
+    pool: Pool,
+    base_dsn: str,
+    manage_rw_images: bool,
+    wait_for_image: Callable[[int], None],
+    stop_running: Callable[[], bool],
+    only_prefix: Optional[str] = None,
+    application_name: Optional[str] = None,
+) -> None:
+    """Manage RBD image creation and mapping automatically.
 
-            if did_something:
-                attempt = 0
+    Arguments:
+      base_dsn: the DSN of the connection to the SharedBase
+      manage_rw_images: whether RW images should be created and mapped
+      wait_for_image: function which is called at each loop iteration, with
+        an attempt number, if no images had to be mapped recently
+      stop_running: callback that returns True when the manager should stop running
+      only_prefix: only map images with the given name prefix
+      application_name: the application name sent to PostgreSQL
+    """
+    application_name = application_name or "Winery RBD image manager"
+    base = SharedBase(base_dsn=base_dsn, application_name=application_name)
+
+    mapped_images: Dict[str, Literal["ro", "rw"]] = {}
+
+    attempt = 0
+    notified_systemd = False
+    while not stop_running():
+        did_something = False
+        logger.debug("Listing shards")
+        start = time.monotonic()
+        shards = [
+            (shard_name, shard_state)
+            for shard_name, shard_state in base.list_shards()
+            if not only_prefix or shard_name.startswith(only_prefix)
+        ]
+        random.shuffle(shards)
+        if logger.isEnabledFor(logging.DEBUG):
+            logger.debug(
+                "Listed %d shards in %.02f seconds",
+                len(shards),
+                time.monotonic() - start,
+            )
+            logger.debug("Mapped images: %s", Counter(mapped_images.values()))
+
+        for shard_name, shard_state in shards:
+            mapped_state = mapped_images.get(shard_name)
+            if mapped_state == "ro":
+                if shard_state == ShardState.PACKED:
+                    record_shard_mapped(base, shard_name)
+                continue
+            elif shard_state.image_available:
+                check_mapped = pool.image_mapped(shard_name)
+                if check_mapped == "ro":
+                    logger.debug(
+                        "Detected %s shard %s, already mapped read-only",
+                        shard_state.name,
+                        shard_name,
+                    )
+                elif check_mapped == "rw":
+                    logger.info(
+                        "Detected %s shard %s, remapping read-only",
+                        shard_state.name,
+                        shard_name,
+                    )
+                    pool.image_remap_ro(shard_name)
+                    attempt = 0
+                    while pool.image_mapped(shard_name) != "ro":
+                        attempt += 1
+                        time.sleep(0.1)
+                        if attempt % 100 == 0:
+                            logger.warning(
+                                "Waiting for %s shard %s to be remapped "
+                                "read-only (for %ds)",
+                                shard_state.name,
+                                shard_name,
+                                attempt / 10,
+                            )
+                    record_shard_mapped(base, shard_name)
+                    did_something = True
+                else:
+                    logger.debug(
+                        "Detected %s shard %s, mapping read-only",
+                        shard_state.name,
+                        shard_name,
+                    )
+                    pool.image_map(shard_name, options="ro")
+                    record_shard_mapped(base, shard_name)
+                    did_something = True
+                mapped_images[shard_name] = "ro"
+            elif manage_rw_images:
+                if os.path.exists(pool.image_path(shard_name)):
+                    # Image already mapped, nothing to do
+                    pass
+                elif not pool.image_exists(shard_name):
+                    logger.info(
+                        "Detected %s shard %s, creating RBD image",
+                        shard_state.name,
+                        shard_name,
+                    )
+                    pool.image_create(shard_name)
+                    did_something = True
+                else:
+                    logger.warn(
+                        "Detected %s shard %s and RBD image exists, mapping read-write",
+                        shard_state.name,
+                        shard_name,
+                    )
+                    pool.image_map(shard_name, "rw")
+                    did_something = True
+                # Now the shard is mapped
+                mapped_images[shard_name] = "rw"
             else:
-                # Sleep using the current value
-                wait_for_image(attempt)
-                attempt += 1
+                logger.debug("%s shard %s, skipping", shard_state.name, shard_name)
+
+            notify(
+                "STATUS="
+                f"Enumerated {len(shards)} shards, "
+                f"mapped {len(mapped_images)} images"
+            )
+
+        if not notified_systemd:
+            # The first iteration has happened, all known shards should be ready
+            notify("READY=1")
+            notified_systemd = True
+
+        if did_something:
+            attempt = 0
+        else:
+            # Sleep using the current value
+            wait_for_image(attempt)
+            attempt += 1
 
 
 def pool_from_settings(
diff --git a/swh/objstorage/cli.py b/swh/objstorage/cli.py
index b6677d1..d3396ae 100644
--- a/swh/objstorage/cli.py
+++ b/swh/objstorage/cli.py
@@ -158,7 +158,7 @@ def winery_rbd(
     """Run a winery RBD image manager process"""
     import signal
 
-    from swh.objstorage.backends.winery.roshard import Pool
+    from swh.objstorage.backends.winery.roshard import Pool, manage_images
     from swh.objstorage.backends.winery.sleep import sleep_exponential
 
     legacy_kwargs = ctx.obj["winery_legacy_kwargs"]
@@ -199,7 +199,8 @@ def winery_rbd(
         rbd_map_options=legacy_kwargs["rbd_map_options"],
     )
 
-    pool.manage_images(
+    manage_images(
+        pool=pool,
         base_dsn=legacy_kwargs["base_dsn"],
         manage_rw_images=manage_rw_images,
         wait_for_image=wait_for_image,
-- 
GitLab


From f7927dae034f3271a5a5702fcec5e38dafcde1d6 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Wed, 5 Mar 2025 18:48:11 +0100
Subject: [PATCH 18/25] winery: Make the directory-based shards pool usable

---
 swh/objstorage/backends/winery/objstorage.py  |   6 +-
 swh/objstorage/backends/winery/roshard.py     | 171 ++++++++++++++----
 swh/objstorage/backends/winery/settings.py    |  36 +++-
 swh/objstorage/cli.py                         |  31 ++--
 .../tests/test_objstorage_winery.py           |  64 ++++---
 .../tests/winery_testing_helpers.py           |  77 +-------
 6 files changed, 227 insertions(+), 158 deletions(-)

diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
index 19ba612..32ad08f 100644
--- a/swh/objstorage/backends/winery/objstorage.py
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -192,7 +192,7 @@ def pack(
     packer_settings: settings.Packer,
     throttler_settings: Optional[settings.Throttler],
     shards_settings: settings.Shards,
-    shards_pool_settings: settings.RbdShardsPool,
+    shards_pool_settings: settings.ShardsPool,
     shared_base: Optional[SharedBase] = None,
 ) -> bool:
     rw = RWShard(shard, shard_max_size=shards_settings["max_size"], base_dsn=base_dsn)
@@ -248,7 +248,7 @@ class WineryWriter:
         packer_settings: settings.Packer,
         throttler_settings: Optional[settings.Throttler],
         shards_settings: settings.Shards,
-        shards_pool_settings: settings.RbdShardsPool,
+        shards_pool_settings: settings.ShardsPool,
         rwshard_idle_timeout: float = 300,
         **kwargs,
     ):
@@ -449,12 +449,12 @@ def shard_packer(
             logger.info("shard_packer: Locked shard %s to pack", locked.name)
             ret = pack(
                 shard=locked.name,
+                base_dsn=all_settings["database"]["db"],
                 packer_settings=all_settings["packer"],
                 throttler_settings=all_settings["throttler"],
                 shards_settings=all_settings["shards"],
                 shards_pool_settings=all_settings["shards_pool"],
                 shared_base=base,
-                **legacy_kwargs,
             )
             if not ret:
                 raise ValueError("Packing shard %s failed" % locked.name)
diff --git a/swh/objstorage/backends/winery/roshard.py b/swh/objstorage/backends/winery/roshard.py
index c4f719c..28e53ac 100644
--- a/swh/objstorage/backends/winery/roshard.py
+++ b/swh/objstorage/backends/winery/roshard.py
@@ -7,6 +7,7 @@ from collections import Counter
 import logging
 import math
 import os
+from pathlib import Path
 import random
 import shlex
 import socket
@@ -14,7 +15,17 @@ import stat
 import subprocess
 import time
 from types import TracebackType
-from typing import Callable, Dict, Iterable, Literal, Optional, Tuple, Type
+from typing import (
+    Callable,
+    Dict,
+    Iterable,
+    List,
+    Literal,
+    Optional,
+    Protocol,
+    Tuple,
+    Type,
+)
 
 from systemd.daemon import notify
 
@@ -32,7 +43,46 @@ class ShardNotMapped(Exception):
     pass
 
 
-class Pool(object):
+class Pool(Protocol):
+    def image_exists(self, image: str) -> bool:
+        """Check whether the named image exists (it does not have to be mapped)"""
+        ...
+
+    def image_mapped(self, image: str) -> Optional[Literal["ro", "rw"]]:
+        """Check whether the image is already mapped, read-only or read-write"""
+        try:
+            image_stat = os.stat(self.image_path(image))
+        except FileNotFoundError:
+            return None
+        return "rw" if (image_stat.st_mode & 0o222) != 0 else "ro"
+
+    def image_list(self) -> List[str]:
+        """List all known images, mapped or not"""
+        ...
+
+    def image_path(self, image: str) -> str:
+        """Return a path to the image, that can be opened with :func:`open`."""
+        ...
+
+    def image_create(self, image: str) -> None:
+        """Create a new image named `image` and allocate the right amount of space."""
+        ...
+
+    def image_map(self, image: str, options: str) -> None:
+        """Map an image for use. Options can be `"ro"` to map the image read-only, or
+        `"rw"` to map the image read-write."""
+        ...
+
+    def image_unmap(self, image: str) -> None:
+        """Unmap the image. Once this is done, the image is unavailable for use."""
+        ...
+
+    def image_remap_ro(self, image: str):
+        self.image_unmap(image)
+        self.image_map(image, "ro")
+
+
+class RBDPool(Pool):
     """Manage a Ceph RBD pool for Winery shards.
 
     Arguments:
@@ -75,7 +125,7 @@ class Pool(object):
     )
 
     @classmethod
-    def from_kwargs(cls, **kwargs) -> "Pool":
+    def from_kwargs(cls, **kwargs) -> "RBDPool":
         """Create a Pool from a set of arbitrary keyword arguments"""
         return cls(**{k: kwargs[k] for k in cls.POOL_CONFIG if k in kwargs})
 
@@ -108,14 +158,6 @@ class Pool(object):
         else:
             return True
 
-    def image_mapped(self, image: str) -> Optional[Literal["ro", "rw"]]:
-        """Check whether the image is already mapped, read-only or read-write"""
-        try:
-            image_stat = os.stat(self.image_path(image))
-        except FileNotFoundError:
-            return None
-        return "rw" if (image_stat.st_mode & 0o222) != 0 else "ro"
-
     def image_list(self):
         try:
             images = self.rbd("ls")
@@ -154,10 +196,6 @@ class Pool(object):
             image,
         )
 
-    def image_remap_ro(self, image: str):
-        self.image_unmap(image)
-        self.image_map(image, "ro")
-
     def image_unmap(self, image: str):
         if os.path.exists(self.image_path(image)):
             try:
@@ -171,6 +209,88 @@ class Pool(object):
                     raise
 
 
+class FileBackedPool(Pool):
+    """File-backed pool for Winery shards mimicking a Ceph RBD pool.
+
+    Unmapped images are represented by setting the file permission to 0o000.
+    """
+
+    def __init__(
+        self,
+        base_directory: Path,
+        pool_name: str,
+        shard_max_size: int,
+    ) -> None:
+        self.base_directory = base_directory
+        self.pool_name = pool_name
+        self.image_size = shard_max_size
+
+        self.pool_dir = self.base_directory / self.pool_name
+        self.pool_dir.mkdir(exist_ok=True)
+
+    def image_exists(self, image: str) -> bool:
+        return (self.pool_dir / image).is_file()
+
+    def image_list(self) -> List[str]:
+        return [entry.name for entry in self.pool_dir.iterdir() if entry.is_file()]
+
+    def image_path(self, image: str) -> str:
+        return str(self.pool_dir / image)
+
+    def image_create(self, image: str) -> None:
+        path = self.image_path(image)
+        if os.path.exists(path):
+            raise ValueError(f"Image {image} already exists")
+        open(path, "w").close()
+        os.truncate(path, self.image_size * 1024 * 1024)
+        self.image_map(image, "rw")
+
+    def image_map(self, image: str, options: str) -> None:
+        if "ro" in options:
+            os.chmod(self.image_path(image), 0o400)
+        else:
+            os.chmod(self.image_path(image), 0o600)
+
+    def image_unmap(self, image: str) -> None:
+        os.chmod(self.image_path(image), 0o000)
+
+    def image_unmap_all(self) -> None:
+        for entry in self.pool_dir.iterdir():
+            if entry.is_file():
+                entry.chmod(0o000)
+
+
+def pool_from_settings(
+    shards_settings: settings.Shards,
+    shards_pool_settings: settings.ShardsPool,
+) -> Pool:
+    """Return a Pool from the settings"""
+    pool_type = shards_pool_settings["type"]
+    if pool_type == "rbd":
+        rbd_settings = settings.rbd_shards_pool_settings_with_defaults(
+            shards_pool_settings
+        )
+        return RBDPool(
+            shard_max_size=shards_settings["max_size"],
+            rbd_use_sudo=rbd_settings["use_sudo"],
+            rbd_pool_name=rbd_settings["pool_name"],
+            rbd_data_pool_name=rbd_settings["data_pool_name"],
+            rbd_image_features_unsupported=rbd_settings["image_features_unsupported"],
+            rbd_map_options=rbd_settings["map_options"],
+        )
+    elif pool_type == "directory":
+        dir_settings = settings.directory_shards_pool_settings_with_defaults(
+            shards_pool_settings
+        )
+        return FileBackedPool(
+            shard_max_size=shards_settings["max_size"],
+            base_directory=Path(dir_settings["base_directory"]),
+            pool_name=dir_settings["pool_name"],
+        )
+    else:
+        raise ValueError(f"Unknown shards pool type: {pool_type}")
+
+
 def record_shard_mapped(base: SharedBase, shard_name: str):
     """Record a shard as mapped, bailing out after a few attempts.
 
@@ -297,7 +417,7 @@ def manage_images(
                     pool.image_create(shard_name)
                     did_something = True
                 else:
-                    logger.warn(
+                    logger.warning(
                         "Detected %s shard %s and RBD image exists, mapping read-write",
                         shard_state.name,
                         shard_name,
@@ -328,25 +448,6 @@ def manage_images(
             attempt += 1
 
 
-def pool_from_settings(
-    shards_settings: settings.Shards, shards_pool_settings: settings.RbdShardsPool
-) -> Pool:
-    pool_type = shards_pool_settings["type"]
-    if pool_type == "rbd":
-        return Pool(
-            shard_max_size=shards_settings["max_size"],
-            rbd_use_sudo=shards_pool_settings["use_sudo"],
-            rbd_pool_name=shards_pool_settings["pool_name"],
-            rbd_data_pool_name=shards_pool_settings["data_pool_name"],
-            rbd_image_features_unsupported=shards_pool_settings[
-                "image_features_unsupported"
-            ],
-            rbd_map_options=shards_pool_settings["map_options"],
-        )
-    else:
-        raise ValueError(f"Unknown shards pool type: {pool_type}")
-
-
 class ROShard:
     def __init__(self, name, throttler, pool, **kwargs):
         self.pool = pool
diff --git a/swh/objstorage/backends/winery/settings.py b/swh/objstorage/backends/winery/settings.py
index 47737a8..3c9959f 100644
--- a/swh/objstorage/backends/winery/settings.py
+++ b/swh/objstorage/backends/winery/settings.py
@@ -48,13 +48,12 @@ def shards_settings_with_defaults(values: Shards) -> Shards:
 class ShardsPool(TypedDict):
     """Settings for the Shards pool"""
 
-    type: Literal["rbd"]
+    type: Literal["rbd", "directory"]
 
 
-class RbdShardsPool(TypedDict):
+class RbdShardsPool(ShardsPool, TypedDict):
     """Settings for the Ceph RBD-based Shards pool"""
 
-    type: Literal["rbd"]
     use_sudo: NotRequired[bool]
     map_options: NotRequired[str]
     pool_name: NotRequired[str]
@@ -77,6 +76,32 @@ def rbd_shards_pool_settings_with_defaults(
     }
 
 
+class DirectoryShardsPool(ShardsPool, TypedDict):
+    """Settings for the File-based Shards pool"""
+
+    base_directory: str
+    pool_name: NotRequired[str]
+
+
+def directory_shards_pool_settings_with_defaults(
+    values: ShardsPool,
+) -> DirectoryShardsPool:
+    """Hydrate RbdShards settings with default values"""
+    if values["type"] != "directory":
+        raise ValueError(
+            f"Instantiating a directory shards pool with the wrong type: {values['type']}"
+        )
+    if "base_directory" not in values:
+        raise ValueError(
+            "Missing base_directory setting for Directory-based shards pool"
+        )
+    return {
+        "type": "directory",
+        "pool_name": values.get("pool_name", "shards"),  # type: ignore[typeddict-item]
+        "base_directory": values["base_directory"],  # type: ignore[typeddict-item]
+    }
+
+
 class Throttler(TypedDict):
     """Settings for the winery throttler"""
 
@@ -107,7 +132,7 @@ class Winery(TypedDict, total=False):
 
     database: Database
     shards: Shards
-    shards_pool: RbdShardsPool
+    shards_pool: ShardsPool
     throttler: Optional[Throttler]
     packer: Packer
 
@@ -142,6 +167,9 @@ def populate_default_settings(
         if shards_pool["type"] == "rbd":
             shards_pool = rbd_shards_pool_settings_with_defaults(shards_pool)
             settings["shards_pool"] = shards_pool
+        elif shards_pool["type"] == "directory":
+            shards_pool = directory_shards_pool_settings_with_defaults(shards_pool)
+            settings["shards_pool"] = shards_pool
         else:
             raise ValueError(f"Unknown shards pool type: {shards_pool['type']}")
 
diff --git a/swh/objstorage/cli.py b/swh/objstorage/cli.py
index d3396ae..e017b82 100644
--- a/swh/objstorage/cli.py
+++ b/swh/objstorage/cli.py
@@ -158,10 +158,10 @@ def winery_rbd(
     """Run a winery RBD image manager process"""
     import signal
 
-    from swh.objstorage.backends.winery.roshard import Pool, manage_images
+    from swh.objstorage.backends.winery.roshard import manage_images, pool_from_settings
     from swh.objstorage.backends.winery.sleep import sleep_exponential
 
-    legacy_kwargs = ctx.obj["winery_legacy_kwargs"]
+    settings = ctx.obj["winery_settings"]
 
     stop_on_next_iteration = False
 
@@ -190,18 +190,14 @@ def winery_rbd(
     signal.signal(signal.SIGINT, set_signal_received)
     signal.signal(signal.SIGTERM, set_signal_received)
 
-    pool = Pool(
-        shard_max_size=legacy_kwargs["shard_max_size"],
-        rbd_pool_name=legacy_kwargs["rbd_pool_name"],
-        rbd_data_pool_name=legacy_kwargs["rbd_data_pool_name"],
-        rbd_use_sudo=legacy_kwargs["rbd_use_sudo"],
-        rbd_image_features_unsupported=legacy_kwargs["rbd_image_features_unsupported"],
-        rbd_map_options=legacy_kwargs["rbd_map_options"],
+    pool = pool_from_settings(
+        shards_settings=settings["shards"],
+        shards_pool_settings=settings["shards_pool"],
     )
 
     manage_images(
         pool=pool,
-        base_dsn=legacy_kwargs["base_dsn"],
+        base_dsn=settings["database"]["db"],
         manage_rw_images=manage_rw_images,
         wait_for_image=wait_for_image,
         only_prefix=only_prefix,
@@ -281,10 +277,10 @@ def winery_clean_deleted_objects(ctx):
     import signal
 
     from swh.objstorage.backends.winery.objstorage import deleted_objects_cleaner
-    from swh.objstorage.backends.winery.roshard import Pool
+    from swh.objstorage.backends.winery.roshard import pool_from_settings
     from swh.objstorage.backends.winery.sharedbase import SharedBase
 
-    legacy_kwargs = ctx.obj["legacy_kwargs"]
+    settings = ctx.obj["winery_settings"]
 
     stop_on_next_iteration = False
 
@@ -300,14 +296,11 @@ def winery_clean_deleted_objects(ctx):
     signal.signal(signal.SIGINT, set_signal_received)
     signal.signal(signal.SIGTERM, set_signal_received)
 
-    base = SharedBase(base_dsn=legacy_kwargs["base_dsn"])
+    base = SharedBase(base_dsn=settings["database"]["db"])
 
-    pool = Pool(
-        shard_max_size=legacy_kwargs["shard_max_size"],
-        rbd_pool_name=legacy_kwargs["rbd_pool_name"],
-        rbd_data_pool_name=legacy_kwargs["rbd_data_pool_name"],
-        rbd_use_sudo=legacy_kwargs["rbd_use_sudo"],
-        rbd_image_features_unsupported=legacy_kwargs["rbd_image_features_unsupported"],
+    pool = pool_from_settings(
+        shards_settings=settings["shards"],
+        shards_pool_settings=settings["shards_pool"],
     )
 
     deleted_objects_cleaner(base, pool, stop_running)
diff --git a/swh/objstorage/tests/test_objstorage_winery.py b/swh/objstorage/tests/test_objstorage_winery.py
index 5af8d70..4dfc5bf 100644
--- a/swh/objstorage/tests/test_objstorage_winery.py
+++ b/swh/objstorage/tests/test_objstorage_winery.py
@@ -27,6 +27,7 @@ from swh.objstorage.backends.winery.objstorage import (
     shard_packer,
     stop_after_shards,
 )
+from swh.objstorage.backends.winery.roshard import FileBackedPool
 import swh.objstorage.backends.winery.settings as settings
 from swh.objstorage.backends.winery.sharedbase import ShardState, SharedBase
 from swh.objstorage.backends.winery.sleep import sleep_exponential
@@ -42,7 +43,7 @@ from swh.objstorage.factory import get_objstorage
 from swh.objstorage.objstorage import objid_for_content
 from swh.objstorage.tests.objstorage_testing import ObjStorageTestFixture
 
-from .winery_testing_helpers import FileBackedPool, PoolHelper
+from .winery_testing_helpers import RBDPoolHelper
 
 logger = logging.getLogger(__name__)
 
@@ -102,7 +103,7 @@ def rbd_map_options():
 
 @pytest.fixture
 def ceph_pool(remove_pool, remove_images, rbd_pool_name, rbd_map_options, needs_ceph):
-    pool = PoolHelper(
+    pool = RBDPoolHelper(
         shard_max_size=10 * 1024 * 1024,
         rbd_pool_name=rbd_pool_name,
         rbd_map_options=rbd_map_options,
@@ -113,6 +114,12 @@ def ceph_pool(remove_pool, remove_images, rbd_pool_name, rbd_map_options, needs_
     else:
         logger.info("Not removing pool")
 
+    pool._settings_for_tests = {
+        "type": "rbd",
+        "pool_name": rbd_pool_name,
+        "map_options": rbd_map_options,
+    }
+
     yield pool
 
     if remove_images or remove_pool:
@@ -128,13 +135,21 @@ def ceph_pool(remove_pool, remove_images, rbd_pool_name, rbd_map_options, needs_
 
 @pytest.fixture
 def file_backed_pool(mocker, tmp_path, shard_max_size, rbd_pool_name):
-    FileBackedPool.set_base_directory(tmp_path)
-    mocker.patch(
-        "swh.objstorage.backends.winery.roshard.Pool",
-        new=FileBackedPool,
+    pool = FileBackedPool(
+        base_directory=tmp_path,
+        shard_max_size=10 * 1024 * 1024,
+        pool_name=rbd_pool_name,
     )
-    pool = FileBackedPool(shard_max_size=10 * 1024 * 1024, rbd_pool_name=rbd_pool_name)
     pool.image_unmap_all()
+    mocker.patch(
+        "swh.objstorage.backends.winery.roshard.RBDPool.from_kwargs",
+        return_value=pool,
+    )
+    pool._settings_for_tests = {
+        "type": "directory",
+        "base_directory": str(tmp_path),
+        "pool_name": rbd_pool_name,
+    }
     yield pool
 
 
@@ -210,8 +225,7 @@ def winery_settings(
     shard_max_size,
     pack_immediately,
     clean_immediately,
-    rbd_pool_name,
-    rbd_map_options,
+    image_pool,
     use_throttler,
 ) -> settings.Winery:
     return dict(
@@ -227,14 +241,11 @@ def winery_settings(
             else None
         ),
         packer={
+            "create_images": True,
             "pack_immediately": pack_immediately,
             "clean_immediately": clean_immediately,
         },
-        shards_pool={
-            "type": "rbd",
-            "pool_name": rbd_pool_name,
-            "map_options": rbd_map_options,
-        },
+        shards_pool=image_pool._settings_for_tests,
     )
 
 
@@ -434,8 +445,7 @@ def test_winery_deleted_objects_cleaner_handles_exception(
     winery_writer, file_backed_pool, mocker
 ):
     from swh.objstorage.backends.winery import objstorage as winery_objstorage
-
-    from ..backends.winery.roshard import ROShard
+    from swh.objstorage.backends.winery.roshard import ROShard
 
     # Add two objects
     shard = winery_writer.base.locked_shard
@@ -475,7 +485,9 @@ def test_winery_deleted_objects_cleaner_handles_exception(
         return None
 
     mocker.patch.object(
-        winery_objstorage.ROShard, "delete", side_effect=roshard_delete_side_effect
+        winery_objstorage.roshard.ROShard,
+        "delete",
+        side_effect=roshard_delete_side_effect,
     )
 
     # Let’s run the cleaner
@@ -527,14 +539,16 @@ def test_winery_pack(winery_settings, winery_writer, image_pool):
     winery_writer.base.shard_packing_starts(shard)
 
     assert pack(
-        shard,
+        shard=shard,
+        base_dsn=winery_settings["database"]["db"],
         packer_settings=winery_settings["packer"],
         throttler_settings=winery_settings["throttler"],
-        **winery_writer.args,
+        shards_settings=winery_settings["shards"],
+        shards_pool_settings=winery_settings["shards_pool"],
     )
     assert winery_writer.base.get_shard_state(shard) == ShardState.PACKED
 
-    assert cleanup_rw_shard(shard, **winery_writer.args)
+    assert cleanup_rw_shard(shard, base_dsn=winery_settings["database"]["db"])
     assert winery_writer.base.get_shard_state(shard) == ShardState.READONLY
 
 
@@ -716,9 +730,9 @@ def test_winery_packer_clean_up_interrupted_shard(
         ret = shard_packer(
             database=winery_settings["database"],
             shards=winery_settings["shards"],
-            shards_pool={**winery_settings["shards_pool"], "create_images": False},
+            shards_pool=winery_settings["shards_pool"],
             throttler=winery_settings["throttler"],
-            packer=winery_settings.get("packer"),
+            packer={**winery_settings.get("packer"), "create_images": False},
             stop_packing=stop_after_shards(1),
         )
 
@@ -1099,9 +1113,11 @@ def test_winery_get_object(winery_settings, winery_writer, winery_reader, image_
     assert (
         pack(
             shard,
+            base_dsn=winery_settings["database"]["db"],
             packer_settings=winery_settings["packer"],
             throttler_settings=winery_settings["throttler"],
-            **winery_writer.args,
+            shards_settings=winery_settings["shards"],
+            shards_pool_settings=winery_settings["shards_pool"],
         )
         is True
     )
@@ -1111,7 +1127,7 @@ def test_winery_get_object(winery_settings, winery_writer, winery_reader, image_
 @pytest.mark.skipif("CEPH_HARDCODE_POOL" in os.environ, reason="Ceph pool hardcoded")
 def test_winery_ceph_pool(needs_ceph, rbd_map_options):
     name = "IMAGE"
-    pool = PoolHelper(
+    pool = RBDPoolHelper(
         shard_max_size=10 * 1024 * 1024,
         rbd_pool_name="test-winery-ceph-pool",
         rbd_map_options=rbd_map_options,
diff --git a/swh/objstorage/tests/winery_testing_helpers.py b/swh/objstorage/tests/winery_testing_helpers.py
index 404e403..e9a7835 100644
--- a/swh/objstorage/tests/winery_testing_helpers.py
+++ b/swh/objstorage/tests/winery_testing_helpers.py
@@ -5,12 +5,10 @@
 
 import atexit
 import logging
-import os
-from pathlib import Path
 from subprocess import CalledProcessError
-from typing import Iterable, List, Optional, Tuple
+from typing import Iterable, Optional, Tuple
 
-from swh.objstorage.backends.winery.roshard import Pool
+from swh.objstorage.backends.winery.roshard import RBDPool
 from swh.objstorage.backends.winery.settings import DEFAULT_IMAGE_FEATURES_UNSUPPORTED
 
 logger = logging.getLogger(__name__)
@@ -29,7 +27,7 @@ DEFAULT_DATA_POOL_SETTINGS = {
 }
 
 
-class PoolHelper(Pool):
+class RBDPoolHelper(RBDPool):
     def __init__(
         self,
         shard_max_size: int,
@@ -65,7 +63,7 @@ class PoolHelper(Pool):
             "pg_num", DEFAULT_DATA_POOL_SETTINGS["pg_num"]
         )
 
-    POOL_CONFIG = Pool.POOL_CONFIG + (
+    POOL_CONFIG = RBDPool.POOL_CONFIG + (
         "rbd_erasure_code_profile",
         "rbd_data_pool_settings",
     )
@@ -148,70 +146,3 @@ class PoolHelper(Pool):
             self.ceph("osd", "pool", "set", self.data_pool_name, setting, value)
 
         self.ceph("osd", "pool", "create", self.pool_name)
-
-
-class FileBackedPool(Pool):
-    """File-backed pool for Winery shards mimicking a Ceph RBD pool.
-
-    Unmapped images are represented by setting the file permission to 0o000.
-    """
-
-    base_directory: Optional[Path] = None
-
-    def __init__(
-        self,
-        *args,
-        **kwargs,
-    ) -> None:
-        super().__init__(*args, **kwargs)
-        assert (
-            FileBackedPool.base_directory is not None
-        ), "set_base_directory() should have been called first"
-        self.pool_dir = FileBackedPool.base_directory / self.pool_name
-        self.pool_dir.mkdir(exist_ok=True)
-
-    @classmethod
-    def set_base_directory(cls, base_directory: Path) -> None:
-        cls.base_directory = base_directory
-
-    @classmethod
-    def from_kwargs(cls, **kwargs) -> "Pool":
-        """Create a Pool from a set of arbitrary keyword arguments"""
-        return cls(**{k: kwargs[k] for k in Pool.POOL_CONFIG if k in kwargs})
-
-    def run(self, *cmd: str) -> Iterable[str]:
-        raise NotImplementedError
-
-    def rbd(self, *arguments: str) -> Iterable[str]:
-        raise NotImplementedError
-
-    def image_exists(self, image: str) -> bool:
-        return (self.pool_dir / image).is_file()
-
-    def image_list(self) -> List[str]:
-        return [entry.name for entry in self.pool_dir.iterdir() if entry.is_file()]
-
-    def image_path(self, image: str) -> str:
-        return str(self.pool_dir / image)
-
-    def image_create(self, image: str) -> None:
-        path = self.image_path(image)
-        if os.path.exists(path):
-            raise ValueError(f"Image {image} already exists")
-        open(path, "w").close()
-        os.truncate(path, self.image_size * 1024 * 1024)
-        self.image_map(image, "rw")
-
-    def image_map(self, image: str, options: str) -> None:
-        if "ro" in options:
-            os.chmod(self.image_path(image), 0o400)
-        else:
-            os.chmod(self.image_path(image), 0o600)
-
-    def image_unmap(self, image: str) -> None:
-        os.chmod(self.image_path(image), 0o000)
-
-    def image_unmap_all(self) -> None:
-        for entry in self.pool_dir.iterdir():
-            if entry.is_file():
-                entry.chmod(0o000)
-- 
GitLab


From 1eafb82322890ac9f92d6cbd889fbf9b100a1bb4 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Tue, 11 Mar 2025 16:16:33 +0100
Subject: [PATCH 19/25] winery: close read-only (and read-write) shards on
 shutdown

---
 swh/objstorage/backends/winery/objstorage.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
index 32ad08f..3bbded3 100644
--- a/swh/objstorage/backends/winery/objstorage.py
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -127,6 +127,7 @@ class WineryObjStorage(ObjStorage):
             yield {"sha256": signature}
 
     def on_shutdown(self):
+        self.reader.on_shutdown()
         if self.writer:
             self.writer.on_shutdown()
 
@@ -185,6 +186,12 @@ class WineryReader:
             raise ObjNotFoundError(obj_id)
         return content
 
+    def on_shutdown(self):
+        for shard in self.ro_shards.values():
+            shard.close()
+        self.ro_shards = {}
+        self.rw_shards = {}
+
 
 def pack(
     shard: str,
-- 
GitLab


From 6337570aa3e56a2cdcec6c098ced749d63a0b21d Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Tue, 11 Mar 2025 16:18:01 +0100
Subject: [PATCH 20/25] winery: On shutdown, ensure that packer processes are
 waited for

This avoids a bunch of race conditions when running the normal
objstorage test suite on top of a ceph pool.
---
 swh/objstorage/backends/winery/objstorage.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
index 3bbded3..73bbee0 100644
--- a/swh/objstorage/backends/winery/objstorage.py
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -370,6 +370,8 @@ class WineryWriter:
 
     def on_shutdown(self):
         self.release_shard()
+        for p in self.packers:
+            p.join()
 
     def __del__(self):
         for p in getattr(self, "packers", []):
-- 
GitLab


From 1f90aae1ca4c436d3ccbc542d3e82a83299715da Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Tue, 11 Mar 2025 16:37:48 +0100
Subject: [PATCH 21/25] winery: in storage fixture, use rbd_pool_name

The fixture is used for logging but wasn't being injected.
---
 swh/objstorage/tests/test_objstorage_winery.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/swh/objstorage/tests/test_objstorage_winery.py b/swh/objstorage/tests/test_objstorage_winery.py
index 4dfc5bf..7a635ca 100644
--- a/swh/objstorage/tests/test_objstorage_winery.py
+++ b/swh/objstorage/tests/test_objstorage_winery.py
@@ -253,6 +253,7 @@ def winery_settings(
 def storage(
     winery_settings,
     postgresql_dsn,
+    rbd_pool_name,
 ):
     storage = get_objstorage(cls="winery", **winery_settings)
     assert isinstance(storage, WineryObjStorage)
-- 
GitLab


From 2a42ea799871aa98384a667fcbbe0b23c5c6e619 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Tue, 11 Mar 2025 16:38:48 +0100
Subject: [PATCH 22/25] winery: run deletion tests on both pool backends

To run these tests on the ceph backend, we actually need to release
shards so they can be unmapped (the file-based backend can just do
whatever to a file that is being held open, but a block device needs
more care). The newly run test also unearthed a small file descriptor
leak in the swh.perfecthash object deletion paths.
---
 .../tests/test_objstorage_winery.py           | 33 +++++++++++--------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/swh/objstorage/tests/test_objstorage_winery.py b/swh/objstorage/tests/test_objstorage_winery.py
index 7a635ca..9019a71 100644
--- a/swh/objstorage/tests/test_objstorage_winery.py
+++ b/swh/objstorage/tests/test_objstorage_winery.py
@@ -409,7 +409,7 @@ def test_winery_delete_on_rwshard(winery_writer, winery_reader):
 
 @pytest.mark.shard_max_size(1)
 @pytest.mark.pack_immediately(True)
-def test_winery_delete_on_roshard(winery_writer, winery_reader, file_backed_pool):
+def test_winery_delete_on_roshard(winery_writer, winery_reader, image_pool):
     shard = winery_writer.base.locked_shard
     content = b"SOMETHING"
     sha256 = objid_for_content(content)["sha256"]
@@ -419,22 +419,28 @@ def test_winery_delete_on_roshard(winery_writer, winery_reader, file_backed_pool
     for packer in winery_writer.packers:
         packer.join()
     assert winery_reader.get(sha256) == content
+
     # This will only mark as deleted in SharedBase
     winery_writer.delete(sha256)
     assert len(list(winery_writer.base.deleted_objects())) == 1
     # We still should not be able to access it
     with pytest.raises(ObjNotFoundError):
         winery_reader.get(sha256)
+
+    # Make sure all images are released
+    winery_reader.on_shutdown()
+
     # The content is still present in the roshard image at this point
-    image_path = file_backed_pool.image_path(shard)
+    image_path = image_pool.image_path(shard)
     with open(image_path, "rb") as image:
         assert b"SOMETHING" in image.read()
+
     # Perform cleanup
-    file_backed_pool.image_unmap(shard)
-    file_backed_pool.image_map(shard, "rw")
-    deleted_objects_cleaner(
-        winery_reader.base, file_backed_pool, stop_running=lambda: False
-    )
+    image_pool.image_unmap(shard)
+    image_pool.image_map(shard, "rw")
+
+    deleted_objects_cleaner(winery_reader.base, image_pool, stop_running=lambda: False)
+
     assert len(list(winery_reader.base.deleted_objects())) == 0
     with open(image_path, "rb") as image:
         assert b"SOMETHING" not in image.read()
@@ -443,7 +449,7 @@ def test_winery_delete_on_roshard(winery_writer, winery_reader, file_backed_pool
 @pytest.mark.shard_max_size(20)
 @pytest.mark.pack_immediately(True)
 def test_winery_deleted_objects_cleaner_handles_exception(
-    winery_writer, file_backed_pool, mocker
+    winery_writer, image_pool, mocker
 ):
     from swh.objstorage.backends.winery import objstorage as winery_objstorage
     from swh.objstorage.backends.winery.roshard import ROShard
@@ -462,7 +468,7 @@ def test_winery_deleted_objects_cleaner_handles_exception(
         packer.join()
 
     # We should only have one roshard
-    assert len(file_backed_pool.image_list()) == 1
+    assert len(image_pool.image_list()) == 1
 
     # This will only mark as deleted in SharedBase for the time being
     winery_writer.delete(sha256_1)
@@ -470,7 +476,7 @@ def test_winery_deleted_objects_cleaner_handles_exception(
     assert len(list(winery_writer.base.deleted_objects())) == 2
 
     # The content is still present in the roshard image at this point
-    image_path = file_backed_pool.image_path(shard)
+    image_path = image_pool.image_path(shard)
 
     # Setup so we get an exception on the second object
     already_called = False
@@ -492,11 +498,12 @@ def test_winery_deleted_objects_cleaner_handles_exception(
     )
 
     # Let’s run the cleaner
-    file_backed_pool.image_unmap(shard)
-    file_backed_pool.image_map(shard, "rw")
+    image_pool.image_unmap(shard)
+    image_pool.image_map(shard, "rw")
+
     with pytest.raises(OSError):
         winery_objstorage.deleted_objects_cleaner(
-            winery_writer.base, file_backed_pool, stop_running=lambda: False
+            winery_writer.base, image_pool, stop_running=lambda: False
         )
 
     # We should only have one remaining object to delete
-- 
GitLab


From a714ab06b4f41910744f082e500270e189bfd85a Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Fri, 14 Mar 2025 13:17:06 +0100
Subject: [PATCH 23/25] winery settings: remove legacy kwargs

---
 swh/objstorage/backends/winery/objstorage.py | 37 +++++++++++---------
 swh/objstorage/backends/winery/roshard.py    |  2 +-
 swh/objstorage/backends/winery/settings.py   | 10 ++----
 swh/objstorage/backends/winery/sharedbase.py |  5 ++-
 swh/objstorage/cli.py                        |  4 +--
 5 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
index 73bbee0..92b7c36 100644
--- a/swh/objstorage/backends/winery/objstorage.py
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -39,7 +39,7 @@ class WineryObjStorage(ObjStorage):
     ) -> None:
         super().__init__(allow_delete=allow_delete, name=name)
 
-        self.settings, legacy_kwargs = settings.populate_default_settings(
+        self.settings = settings.populate_default_settings(
             database=database,
             shards=shards,
             shards_pool=shards_pool,
@@ -53,7 +53,7 @@ class WineryObjStorage(ObjStorage):
             shards_pool_settings=self.settings["shards_pool"],
         )
         self.reader = WineryReader(
-            throttler=self.throttler, pool=self.pool, **legacy_kwargs
+            throttler=self.throttler, pool=self.pool, database=self.settings["database"]
         )
 
         if readonly:
@@ -64,7 +64,7 @@ class WineryObjStorage(ObjStorage):
                 throttler_settings=self.settings.get("throttler"),
                 shards_settings=self.settings["shards"],
                 shards_pool_settings=self.settings["shards_pool"],
-                **legacy_kwargs,
+                database_settings=self.settings["database"],
             )
 
     @timed
@@ -133,12 +133,14 @@ class WineryObjStorage(ObjStorage):
 
 
 class WineryReader:
-    def __init__(self, throttler: Throttler, pool: roshard.Pool, **kwargs):
-        self.args = kwargs
+    def __init__(
+        self, throttler: Throttler, pool: roshard.Pool, database: settings.Database
+    ):
         self.throttler = throttler
         self.pool = pool
-        self.base_dsn = self.args["base_dsn"]
-        self.base = SharedBase(**self.args)
+        self.base = SharedBase(
+            base_dsn=database["db"], application_name=database["application_name"]
+        )
         self.ro_shards: Dict[str, roshard.ROShard] = {}
         self.rw_shards: Dict[str, RWShard] = {}
 
@@ -154,7 +156,9 @@ class WineryReader:
         if name not in self.ro_shards:
             try:
                 shard = roshard.ROShard(
-                    name=name, throttler=self.throttler, pool=self.pool, **self.args
+                    name=name,
+                    throttler=self.throttler,
+                    pool=self.pool,
                 )
             except roshard.ShardNotMapped:
                 return None
@@ -165,7 +169,7 @@ class WineryReader:
 
     def rwshard(self, name) -> RWShard:
         if name not in self.rw_shards:
-            shard = RWShard(name, shard_max_size=0, base_dsn=self.base_dsn)
+            shard = RWShard(name, shard_max_size=0, base_dsn=self.base.dsn)
             self.rw_shards[name] = shard
         return self.rw_shards[name]
 
@@ -256,19 +260,20 @@ class WineryWriter:
         throttler_settings: Optional[settings.Throttler],
         shards_settings: settings.Shards,
         shards_pool_settings: settings.ShardsPool,
-        rwshard_idle_timeout: float = 300,
-        **kwargs,
+        database_settings: settings.Database,
     ):
         self.packer_settings = packer_settings
         self.throttler_settings = throttler_settings
         self.shards_settings = shards_settings
         self.shards_pool_settings = shards_pool_settings
-        self.args = kwargs
-        self.base = SharedBase(**self.args)
+        self.base = SharedBase(
+            base_dsn=database_settings["db"],
+            application_name=database_settings["application_name"],
+        )
         self.shards_filled: List[str] = []
         self.packers: List[Process] = []
         self._shard: Optional[RWShard] = None
-        self.idle_timeout = rwshard_idle_timeout
+        self.idle_timeout = shards_settings.get("rw_idle_timeout", 300)
 
     def release_shard(
         self,
@@ -297,7 +302,7 @@ class WineryWriter:
         if not self._shard:
             self._shard = RWShard(
                 name=self.base.locked_shard,
-                base_dsn=self.args["base_dsn"],
+                base_dsn=self.base.dsn,
                 shard_max_size=self.shards_settings["max_size"],
                 idle_timeout_cb=partial(self.release_shard, from_idle_handler=True),
                 idle_timeout=self.idle_timeout,
@@ -421,7 +426,7 @@ def shard_packer(
       wait_for_shard: sleep function called when no shards are available to be packed
     """
 
-    all_settings, legacy_kwargs = settings.populate_default_settings(
+    all_settings = settings.populate_default_settings(
         database=database,
         shards=shards,
         shards_pool=shards_pool,
diff --git a/swh/objstorage/backends/winery/roshard.py b/swh/objstorage/backends/winery/roshard.py
index 28e53ac..116de1c 100644
--- a/swh/objstorage/backends/winery/roshard.py
+++ b/swh/objstorage/backends/winery/roshard.py
@@ -449,7 +449,7 @@ def manage_images(
 
 
 class ROShard:
-    def __init__(self, name, throttler, pool, **kwargs):
+    def __init__(self, name, throttler, pool):
         self.pool = pool
         image_status = self.pool.image_mapped(name)
 
diff --git a/swh/objstorage/backends/winery/settings.py b/swh/objstorage/backends/winery/settings.py
index 3c9959f..eb0ccf1 100644
--- a/swh/objstorage/backends/winery/settings.py
+++ b/swh/objstorage/backends/winery/settings.py
@@ -3,7 +3,7 @@
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-from typing import Any, Dict, Literal, NotRequired, Optional, Tuple, TypedDict
+from typing import Literal, NotRequired, Optional, Tuple, TypedDict
 
 # This would be used for image features that are not supported by the kernel RBD
 # driver, e.g. exclusive-lock, object-map and fast-diff for kernels < 5.3
@@ -146,22 +146,18 @@ def populate_default_settings(
     shards_pool: Optional[ShardsPool] = None,
     throttler: Optional[Throttler] = None,
     packer: Optional[Packer] = None,
-) -> Tuple[Winery, Dict[str, Any]]:
+) -> Winery:
     """Given some settings for a Winery objstorage, add all the appropriate
     default settings."""
     settings: Winery = {}
-    legacy_kwargs: Dict[str, Any] = {}
 
     if database is not None:
         database = database_settings_with_defaults(database)
         settings["database"] = database
-        legacy_kwargs["base_dsn"] = database["db"]
-        legacy_kwargs["application_name"] = database["application_name"]
 
     if shards is not None:
         shards = shards_settings_with_defaults(shards)
         settings["shards"] = shards
-        legacy_kwargs["rwshard_idle_timeout"] = shards["rw_idle_timeout"]
 
     if shards_pool is not None:
         if shards_pool["type"] == "rbd":
@@ -183,4 +179,4 @@ def populate_default_settings(
         packer = packer_settings_with_defaults(packer)
         settings["packer"] = packer
 
-    return settings, legacy_kwargs
+    return settings
diff --git a/swh/objstorage/backends/winery/sharedbase.py b/swh/objstorage/backends/winery/sharedbase.py
index af6f2b6..7789082 100644
--- a/swh/objstorage/backends/winery/sharedbase.py
+++ b/swh/objstorage/backends/winery/sharedbase.py
@@ -107,8 +107,11 @@ class SharedBase(Database):
     current_version: int = 2
 
     def __init__(
-        self, base_dsn: str, application_name: str = "SWH Winery SharedBase", **kwargs
+        self, base_dsn: str, application_name: Optional[str] = None, **kwargs
     ) -> None:
+        if application_name is None:
+            application_name = "SWH Winery SharedBase"
+
         super().__init__(
             dsn=base_dsn,
             application_name=application_name,
diff --git a/swh/objstorage/cli.py b/swh/objstorage/cli.py
index e017b82..517c565 100644
--- a/swh/objstorage/cli.py
+++ b/swh/objstorage/cli.py
@@ -107,8 +107,8 @@ def winery(ctx):
         populate_default_settings,
     )
 
-    ctx.obj["winery_settings"], ctx.obj["winery_legacy_kwargs"] = (
-        populate_default_settings(**{k: v for k, v in config.items() if k in SETTINGS})
+    ctx.obj["winery_settings"] = populate_default_settings(
+        **{k: v for k, v in config.items() if k in SETTINGS}
     )
 
 
-- 
GitLab


From 67a32cc278e97f3075e5065860c26836a8b9e1c3 Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Tue, 11 Mar 2025 18:36:46 +0100
Subject: [PATCH 24/25] winery: bring documentation up to date with respect to
 the reality of the code

---
 docs/winery.rst                              | 148 +++++++++++++++++--
 swh/objstorage/backends/winery/sharedbase.py |  14 ++
 2 files changed, 148 insertions(+), 14 deletions(-)

diff --git a/docs/winery.rst b/docs/winery.rst
index b121836..660a9e2 100644
--- a/docs/winery.rst
+++ b/docs/winery.rst
@@ -17,28 +17,148 @@ If the current accumulated bandwidth is above the maximum desired speed for N ac
 Implementation notes
 --------------------
 
-The `sharedstorage.py` file contains the global index implementation that associates every object id to the shard it contains. A list of shard (either writable or readonly) is stored in a table, with a numeric id to save space. The name of the shard is used to create a database (for write shards) or a RBD image (for read shards).
+:py:mod:`swh.objstorage.backends.winery.sharedbase` contains the global objstorage index implementation, which associates every object id  (currently, the SHA256 of the content) to the shard it contains. The list of shards is stored in a table, associating them with a numeric id to save space, and their current :py:class:`swh.objstorage.backends.winery.sharedbase.ShardState`. The name of the shard is used to create a table (for write shards) or a RBD image (for read shards).
 
-The `roshard.py` file contain the lookup function for a read shard and is a thin layer on top of swh-perfect hash.
+:py:mod:`swh.objstorage.backends.winery.roshard` handles read-only shard management: classes handling the lifecycle of the shards pool, the :py:class:`swh.objstorage.backends.winery.roshard.ROShardCreator`, as well as :py:class:`swh.objstorage.backends.winery.roshard.ROShard`, a thin layer on top of :py:mod:`swh.perfecthash` used to access the objects stored inside a read-only shard.
 
-The `rwshard.py` file contains the logic to read, write and enumerate the objects of a write shard using SQL statements on the database dedicated to it.
+:py:mod:`swh.objstorage.backends.winery.rwshard` handles the database-backed write shards for all their lifecycle.
 
-The `obstorage.py` file contains the backend implementation in the `WineryObjStorage` class. It is a thin layer that delegates writes to a `WineryWriter` instance and reads to a `WineryReader` instance. Although they are currently tightly coupled, they are likely to eventually run in different workers if performance and security requires it.
+:py:class:`swh.objstorage.backends.winery.objstorage.WineryObjStorage` is the main entry point compatible with the :py:mod:`swh.objstorage` interface. It is a thin layer backed by a :py:class:`swh.objstorage.backends.winery.objstorage.WineryWriter` for writes, and a :py:class:`swh.objstorage.backends.winery.objstorage.WineryReader` for read-only accesses.
 
-A `WineryReader` must be able to read an object from both Read Shards and Write Shards. It will first determine the kind of shard the object belongs to by looking it up in the global index. If it is a Read Shard, it will lookup the object using the `ROShard` class from `roshard.py`, ultimately using a Ceph RBD image.  If it is a Write Shard, it will lookup the object using the `RWShard` class from `rwshard.py`, ultimately using a PostgreSQL database.
+ :py:class:`swh.objstorage.backends.winery.objstorage.WineryReader` performs read-only actions on both read-only shards and write shards. It will first determine the kind of shard the object belongs to by looking it up in the global index. If it is a read-only Shard, it will lookup the object using :py:class:`swh.objstorage.backends.winery.roshard.ROShard`, backed by the RBD or directory-based shards pool. If it is a write shard, it will lookup the object using the :py:class:`swh.objstorage.backends.winery.rwshard.RWShard`, ultimately using a PostgreSQL table.
 
-All `WineryWriter` operations are idempotent so they can be resumed in case they fail. When a `WineryWriter` is instantiated, it will either:
+All :py:class:`swh.objstorage.backends.winery.objstorage.WineryWriter` operations are idempotent so they can be resumed in case they fail. When a :py:class:`swh.objstorage.backends.winery.objstorage.WineryWriter` is instantiated, it will either:
 
-* Find a Write Shard (i.e. a database) that is not locked by another instance by looking up the list of shards or,
-* Create a new Write Shard by creating a new database
+* Find a write shard (i.e. a table) that is not locked by another instance by looking up the list of shards or,
+* Create a new write shard by creating a new table
 
-and it will lock the Write Shard and own it so no other instance tries to write to it. A PostgreSQL session lock is used to lock the shard so that it is released when the `WineryWrite` process dies unexpectedly and another process can pick it up.
+and it will lock the write Shard and own it so no other instance tries to write to it. Locking is done transactionally by setting a locker id in the shards index, when the :py:class:`swh.objstorage.backends.winery.objstorage.WineryWriter` process dies unexpectedly, these entries need to be manually cleaned up.
 
-When a new object is added to the Write Shard, a new row is added to the global index to record that it is owned by this Write Shard and is in flight. Such an object cannot be read because it is not yet complete. If a request to write the same object is sent to another `WineryWriter` instance, it will fail to add it to the global index because it already exists. Since the object is in flight, the `WineryWriter` will check if the shard associated to the object is:
+Writing a new object writes its identifier in the index table, and its contents in the shard table, within the same transaction.
 
-* its name, which means it owns the object and must resume writing the object
-* not its name, which means another `WineryWriter` owns it and nothing needs to be done
+When the cumulative size of all objects within a Write Shard exceeds a threshold, it is set to be in the `full` state. All objects it contains can be read from it by any :py:class:`swh.objstorage.backends.winery.objstorage.WineryReader` but no new object will be added to it. When `pack_immediately` is set, a process is spawned and is tasked to transform the `full` shard into a Read Shard using the :py:class:`swh.objstorage.backends.winery.objstorage.Packer` class. Should the packing process fail for any reason, a cron job will restart it when it finds Write Shards that are both in the `packing` state and not locked by any process. Packing is done by enumerating all the records from the Write Shard database and writing them into a Read Shard by the same name. Incomplete Read Shards will never be used by :py:class:`swh.objstorage.backends.winery.objstorage.WineryReader` because the global index will direct it to use the Write Shard instead. Once the packing completes, the state of the shard is modified to be `packed`, and from that point on the :py:class:`swh.objstorage.backends.winery.objstorage.WineryReader` will only use the Read Shard to find the objects it contains. If `clean_immediately` is set, the table containing the Write Shard is then destroyed because it is no longer useful and the process terminates on success.
 
-After the content of the object is successfully added to the Write Shard, the state of the record in the global index is modified to no longer be in flight. The client is notified that the operation was successful and the object can be read from the Write Shard from that point on.
 
-When the size of the database associated with a Write Shard exceeds a threshold, it is set to be in the `packing` state. All objects it contains can be read from it by any `WineryReader` but no new object will be added to it. A process is spawned and is tasked to transform it into a Read Shard using the `Packer` class. Should it fail for any reason, a cron job will restart it when it finds Write Shards that are both in the `packing` state and not locked by any process. Packing is done by enumerating all the records from the Write Shard database and writing them into a Read Shard by the same name. Incomplete Read Shards will never be used by `WineryReader` because the global index will direct it to use the Write Shard instead. Once the packing completes, the state of the shard is modified to be readonly and from that point on the `WineryReader` will only use the Read Shard to find the objects it contains. The database containing the Write Shard is then destroyed because it is no longer useful and the process terminates on success.
+Distributed mode
+----------------
+
+In distributed mode, `Winery` is deployed as a few separate components that synchronize each other using the shared database:
+
+* read-only instances provide access, in read-only mode, to both read-only shards, and shards that are currently being written to
+
+* writer instances each hold one of the write tables locked, and write objects to them
+
+* the shard packer `swh objstorage winery packer` handles the packing process asynchronously (outside of the `WineryWriter` process):
+
+  * when a shard becomes `full`, it gets locked by the packer, and moved to the `packing` state
+
+  * the shard file is created (when `create_images` is set) or waited for (if the management is delegated to the shard manager)
+
+  * when the shard file is available, the shard gets packed
+
+  * once the packing is done, the shard is moved to the `packed` state
+
+  * if `clean_immediately` is set, the write shard is immediately removed and the shard moved to the `readonly` state
+
+* the RBD shard manager `swh objstorage winery rbd` handles the management of RBD images:
+
+  * all known `readonly` shards are mapped immediately
+
+  * (if `manage_rw_images` is set) when a `standby` or `writing` shard appears, a new RBD image is provisioned in the Ceph cluster, and mapped read-write
+
+  * when a shard packing completes (and a shard status becomes one of `packed`, `cleaning` or `readonly`), the image is mapped (or remapped) read-only.
+
+  * every time a shard is mapped read-only on a given host, that fact is recorded in a database column
+
+* the RW shard cleaner `swh objstorage winery rw-shard-cleaner` performs clean up of the `packed` read-write shards, as soon as they are recorded as mapped on enough (`--min-mapped-hosts`) hosts. They get locked in the `cleaning` state, the database cleanup is performed, then the shard gets moved in the final `readonly` state.
+
+
+Configuration
+-------------
+
+`Winery` uses a structured configuration schema::
+
+  objstorage:
+    cls: winery
+
+    # boolean (false (default): allow writes, true: only allow reads)
+    readonly: false
+
+    # Shards-related settings
+    shards:
+      # integer: threshold in bytes above which shards get packed. Can be
+      # overflowed by the max allowed object size.
+      max_size: 100_000_000_000
+
+      # float: timeout in seconds after which idle read-write shards get
+      # released by the winery writer process
+      rw_idle_timeout: 300
+
+    # Shared database settings
+    database:
+      # string: PostgreSQL connection string for the object index and read-write
+      # shards
+      db: winery
+
+      # string: PostgreSQL application name for connections (unset by default)
+      application_name: null
+
+    # Shards pool settings
+    shards_pool:
+      ## Settings for the RBD shards pool
+      type: rbd
+
+      # Ceph pool name for RBD metadata (default: shards)
+      pool_name: shards
+
+      # Ceph pool name for RBD data (default: constructed as
+      # `{pool_name}-data`). This is the pool where erasure-coding should be set,
+      # if required.
+      data_pool_name: null
+
+      # Use sudo to perform image management (default: true. Can be set to false
+      # if packer.create_images is false and the rbd image manager is deployed
+      # as root)
+      use_sudo: true
+
+      # Options passed to `rbd image map` (default: empty string)
+      map_options: ""
+
+      # Image features unsupported by the RBD kernel module. E.g.
+      # exclusive-lock, object-map and fast-diff, for Linux kernels older than 5.3
+      image_features_unsupported: []
+
+      ## Settings for the directory shards pool
+      # Shards are stored in `{base_directory}/{pool_name}`
+      type: directory
+      base_directory: /srv/winery/pool
+      pool_name: shards
+
+    # Optional throttler configuration, leave unset to disable throttling
+    throttler:
+      # string: PostgreSQL connection string for the throttler database. Can be
+      # shared with (and defaults to) the main database set in the `database`
+      # section. Must be read-write even for readonly instances.
+      db: winery
+
+      # integer: max read bytes per second
+      max_read_bps: 100_000_000
+
+      # integer: max write bytes per second
+      max_write_bps: 100_000_000
+
+    # Packer-related settings
+    packer:
+      # Whether the winery writer should start packing shards immediately, or
+      # defer to the standalone packer (default: true, the writer launches a
+      # background packer process)
+      pack_immediately: true
+
+      # Whether the packer should create shards in the shard pool, or defer to
+      # the pool manager (default: true, the packer creates images)
+      create_images: true
+
+      # Whether the packer should clean read-write shards from the database
+      # immediately, or defer to the rw shard cleaner (default: true, the packer
+      # cleans read-write shards immediately)
+      clean_immediately: true
diff --git a/swh/objstorage/backends/winery/sharedbase.py b/swh/objstorage/backends/winery/sharedbase.py
index 7789082..a158372 100644
--- a/swh/objstorage/backends/winery/sharedbase.py
+++ b/swh/objstorage/backends/winery/sharedbase.py
@@ -21,24 +21,38 @@ logger = logging.getLogger(__name__)
 
 
 class ShardState(Enum):
+    """Description of the lifecycle of Winery shards"""
+
     STANDBY = "standby"
+    """The write shard is idle but ready to receive new objects as soon as it is locked."""
     WRITING = "writing"
+    """The write shard is currently locked by a WineryWriter and receiving writes."""
     FULL = "full"
+    """The write shard has reached the size threshold and will not be written to anymore,
+    it is ready to be packed."""
     PACKING = "packing"
+    """The write shard is being packed into its read-only version."""
     PACKED = "packed"
+    """The read-only shard has been finalized, the write shard is pending cleanup as soon as
+    all hosts have acknowledged the read-only shard."""
     CLEANING = "cleaning"
+    """The write shard has been locked for cleanup."""
     READONLY = "readonly"
+    """Only the read-only shard remains."""
 
     @property
     def locked(self):
+        """The state corresponds to a locked shard"""
         return self not in {self.STANDBY, self.FULL, self.PACKED, self.READONLY}
 
     @property
     def image_available(self):
+        """In this state, the read-only shard is available"""
         return self in {self.PACKED, self.CLEANING, self.READONLY}
 
     @property
     def readonly(self):
+        """In this state, the write shard is unavailable"""
         return self in {self.CLEANING, self.READONLY}
 
 
-- 
GitLab


From 198abe69f593da6bb789e85e595a14a2d4a566be Mon Sep 17 00:00:00 2001
From: Nicolas Dandrimont <nicolas@dandrimont.eu>
Date: Fri, 14 Mar 2025 14:18:29 +0100
Subject: [PATCH 25/25] winery: improve typing and documentation of SharedBase

---
 swh/objstorage/backends/winery/sharedbase.py | 110 ++++++++++++++++++-
 1 file changed, 105 insertions(+), 5 deletions(-)

diff --git a/swh/objstorage/backends/winery/sharedbase.py b/swh/objstorage/backends/winery/sharedbase.py
index a158372..eb6c5c4 100644
--- a/swh/objstorage/backends/winery/sharedbase.py
+++ b/swh/objstorage/backends/winery/sharedbase.py
@@ -118,6 +118,19 @@ class TemporaryShardLocker:
 
 
 class SharedBase(Database):
+    """The main database for a Winery instance.
+
+    This handles access to the following tables:
+
+    * ``shards`` is the list of shards and their associated :py:class:`ShardState`.
+    * ``signature2shard`` is the mapping between object ids and the shard that
+      contains the associated object.
+
+    This class is also used to lock a shard for exclusive use (by moving it to a
+    locked state, and setting a locker id).
+
+    """
+
     current_version: int = 2
 
     def __init__(
@@ -136,6 +149,7 @@ class SharedBase(Database):
 
     @property
     def locked_shard(self) -> str:
+        """The name of the shard that is currently locked for writing by this SharedBase."""
         self.set_locked_shard()
 
         assert self._locked_shard, "failed to lock a shard"
@@ -143,12 +157,16 @@ class SharedBase(Database):
 
     @property
     def locked_shard_id(self) -> int:
+        """The numeric ID of the shard that is currently locked for writing by this
+        :py:class`SharedBase`."""
         self.set_locked_shard()
 
         assert self._locked_shard, "failed to lock a shard"
         return self._locked_shard[1]
 
     def set_locked_shard(self) -> None:
+        """Lock a shard in :py:const:`ShardState.STANDBY` for writing, creating a new
+        write shard (and the associated table) if none is currently available."""
         if self._locked_shard is not None:
             return
 
@@ -276,6 +294,18 @@ class SharedBase(Database):
         name: Optional[str] = None,
         db: Optional[psycopg.Connection] = None,
     ):
+        """Set the state of a given shard (or of the shard that is currently locked).
+
+        Arguments:
+          new_state: the new :py:class:`ShardState` for the shard.
+          set_locker: whether the shard should be marked as locked by the current
+            :py:class:`SharedBase`.
+          check_locker: whether state change should only be accepted if the shard
+            is currently locked by us.
+          name: the name of the shard to change the state of (default to the currently
+            locked shard).
+          db: pass an existing psycopg connection to run this in an existing transaction.
+        """
         if not name:
             if not self._locked_shard:
                 raise ValueError("Can't set shard state, no shard specified or locked")
@@ -325,6 +355,19 @@ class SharedBase(Database):
             self._locked_shard = None
 
     def create_shard(self, new_state: ShardState) -> Tuple[str, int]:
+        """Create a new write shard (locked by the current `SharedBase`), with a
+        generated name.
+
+        Arguments:
+          new_state: the :py:class:`ShardState` for the new shard.
+
+        Returns:
+          the name and numeric id of the newly created shard.
+
+        Raises:
+          RuntimeError: if the shard creation failed (for instance if a shard
+            with an identical name was created concurrently).
+        """
         name = uuid.uuid4().hex
         #
         # ensure the first character is not a number so it can be used as a
@@ -353,6 +396,7 @@ class SharedBase(Database):
             return res
 
     def shard_packing_starts(self, name: str):
+        """Record the named shard as being packed now."""
         with self.pool.connection() as db, db.transaction():
             with db.cursor() as c:
                 c.execute(
@@ -380,7 +424,8 @@ class SharedBase(Database):
                     db=db,
                 )
 
-    def shard_packing_ends(self, name):
+    def shard_packing_ends(self, name: str):
+        """Record the completion of packing shard ``name``."""
         with self.pool.connection() as db, db.transaction():
             with db.cursor() as c:
                 c.execute(
@@ -407,6 +452,11 @@ class SharedBase(Database):
                 )
 
     def get_shard_info(self, id: int) -> Optional[Tuple[str, ShardState]]:
+        """Get the name and :py:class:`ShardState` of the shard with the given ``id``.
+
+        Returns:
+          :py:const:`None` if the shard with the given ``id`` doesn't exist.
+        """
         with self.pool.connection() as db, db.cursor() as c:
             c.execute("SELECT name, state FROM shards WHERE id = %s", (id,))
             row = c.fetchone()
@@ -415,6 +465,11 @@ class SharedBase(Database):
             return (row[0], ShardState(row[1]))
 
     def get_shard_state(self, name: str) -> Optional[ShardState]:
+        """Get the :py:class:`ShardState` of the named shard.
+
+        Returns:
+          :py:const:`None` if the shard with the given ``name`` doesn't exist.
+        """
         with self.pool.connection() as db, db.cursor() as c:
             c.execute("SELECT state FROM shards WHERE name = %s", (name,))
             row = c.fetchone()
@@ -423,12 +478,25 @@ class SharedBase(Database):
             return ShardState(row[0])
 
     def list_shards(self) -> Iterator[Tuple[str, ShardState]]:
+        """List all known shards and their current :py:class:`ShardState`."""
         with self.pool.connection() as db, db.cursor() as c:
             c.execute("SELECT name, state FROM shards")
             for row in c:
                 yield row[0], ShardState(row[1])
 
     def count_objects(self, name: Optional[str] = None) -> Optional[int]:
+        """Count the known objects in a shard.
+
+        Arguments:
+          name: the name of the shard in which objects should be counted
+            (defaults to the currently locked shard)
+
+        Returns:
+          :py:const:`None` if no shard exists with the given ``name``.
+
+        Raises:
+          ValueError: if no shard has been specified and no shard is currently locked.
+        """
         if not name:
             if not self._locked_shard:
                 raise ValueError("Can't count objects, no shard specified or locked")
@@ -450,6 +518,11 @@ class SharedBase(Database):
             return row[1]
 
     def record_shard_mapped(self, host: str, name: str) -> Set[str]:
+        """Record that the ``name``d shard has been mapped on the given ``host``.
+
+        This is used in the distributed winery mode to acknowledge shards that
+        have been seen by hosts, before the write shard is removed for cleanup.
+        """
         with self.pool.connection() as db, db.transaction():
             with db.cursor() as c:
                 c.execute(
@@ -473,7 +546,9 @@ class SharedBase(Database):
                     )
                 return hosts
 
-    def contains(self, obj_id) -> Optional[int]:
+    def contains(self, obj_id: bytes) -> Optional[int]:
+        """Return the id of the shard which contains ``obj_id``, or :py:const`None`
+        if the object is not known (or deleted)."""
         with self.pool.connection() as db, db.cursor() as c:
             c.execute(
                 "SELECT shard FROM signature2shard WHERE "
@@ -486,12 +561,25 @@ class SharedBase(Database):
             return row[0]
 
     def get(self, obj_id) -> Optional[Tuple[str, ShardState]]:
+        """Return the name and :py:class:`ShardState` of the shard containing ``obj_id``,
+        or :py:const:`None` if the object is not known (or deleted)."""
         id = self.contains(obj_id)
         if id is None:
             return None
         return self.get_shard_info(id)
 
-    def record_new_obj_id(self, db, obj_id) -> Optional[int]:
+    def record_new_obj_id(self, db: psycopg.Connection, obj_id: bytes) -> Optional[int]:
+        """Try to record ``obj_id`` as present in the currently locked shard.
+
+        Arguments:
+          db: a psycopg database with an open transaction
+          obj_id: the id of the object being added
+
+        Returns:
+          The numeric id of the shard in which the object is recorded as present
+          (which can differ from the currently locked shard, if the object was
+          added in another concurrent transaction).
+        """
         db.execute(
             "INSERT INTO signature2shard (signature, shard, state) "
             "VALUES (%s, %s, 'present') ON CONFLICT (signature) DO NOTHING",
@@ -500,11 +588,15 @@ class SharedBase(Database):
         cur = db.execute(
             "SELECT shard FROM signature2shard WHERE signature = %s", (obj_id,)
         )
-        return cur.fetchone()[0]
+        res = cur.fetchone()
+        if not res:
+            raise RuntimeError("Could not record the object in any shard?")
+        return res[0]
 
     def list_signatures(
         self, after_id: Optional[bytes] = None, limit: Optional[int] = None
     ) -> Iterator[bytes]:
+        """List ``limit`` known object ids after ``after_id``."""
         with self.pool.connection() as db:
             cur = db.execute(
                 """SELECT signature
@@ -519,7 +611,8 @@ class SharedBase(Database):
             for row in cur:
                 yield row[0]
 
-    def delete(self, obj_id):
+    def delete(self, obj_id: bytes):
+        """Mark ``obj_id`` for deletion."""
         with self.pool.connection() as db:
             db.execute(
                 "UPDATE signature2shard SET state = 'deleted' WHERE signature = %s",
@@ -527,6 +620,12 @@ class SharedBase(Database):
             )
 
     def deleted_objects(self) -> Iterator[Tuple[bytes, str, ShardState]]:
+        """List all objects marked for deletion, with the name and state of the
+        shard in which the object is stored.
+
+        Returns:
+          an iterator over ``object_id``, shard name, :py:class:`ShardState` tuples
+        """
         with self.pool.connection() as db:
             cur = db.execute(
                 """SELECT signature, shards.name, shards.state
@@ -539,5 +638,6 @@ class SharedBase(Database):
                 yield bytes(signature), name, ShardState(state)
 
     def clean_deleted_object(self, obj_id) -> None:
+        """Remove the reference to the deleted object ``obj_id``."""
         with self.pool.connection() as db:
             db.execute("DELETE FROM signature2shard WHERE signature = %s", (obj_id,))
-- 
GitLab