diff --git a/PKG-INFO b/PKG-INFO index b9ab27a054ebf838c3ada18c803f18b5d3f70878..40ef69a1169f73cd273e1a429515b5101519b628 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.scrubber -Version: 0.0.2 +Version: 0.0.3 Summary: Software Heritage Datastore Scrubber Home-page: https://forge.softwareheritage.org/diffusion/swh-scrubber Author: Software Heritage developers diff --git a/requirements.txt b/requirements.txt index 54ce666057b0069476f0c3d554c84e75efa27614..e35fb35aaf1292b7ab52e4c28ce21f69958d7585 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ # should match https://pypi.python.org/pypi names. For the full spec or # dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html +dulwich diff --git a/swh.scrubber.egg-info/PKG-INFO b/swh.scrubber.egg-info/PKG-INFO index b9ab27a054ebf838c3ada18c803f18b5d3f70878..40ef69a1169f73cd273e1a429515b5101519b628 100644 --- a/swh.scrubber.egg-info/PKG-INFO +++ b/swh.scrubber.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.scrubber -Version: 0.0.2 +Version: 0.0.3 Summary: Software Heritage Datastore Scrubber Home-page: https://forge.softwareheritage.org/diffusion/swh-scrubber Author: Software Heritage developers diff --git a/swh.scrubber.egg-info/SOURCES.txt b/swh.scrubber.egg-info/SOURCES.txt index c65cc279873e5c2fb129a24bd2e29168ec397a4b..964ff870ff4a560ff44df1393617dbe15d550c6a 100644 --- a/swh.scrubber.egg-info/SOURCES.txt +++ b/swh.scrubber.egg-info/SOURCES.txt @@ -48,6 +48,7 @@ swh/scrubber/tests/__init__.py swh/scrubber/tests/conftest.py swh/scrubber/tests/test_cli.py swh/scrubber/tests/test_fixer.py +swh/scrubber/tests/test_init.py swh/scrubber/tests/test_journal_kafka.py swh/scrubber/tests/test_origin_locator.py swh/scrubber/tests/test_storage_postgresql.py \ No newline at end of file diff --git a/swh.scrubber.egg-info/requires.txt b/swh.scrubber.egg-info/requires.txt index 6d9945b93bf96ee682e4900b53699a1e440b70ba..a52c064036407b23442497c549096d7b86c790ff 100644 --- a/swh.scrubber.egg-info/requires.txt +++ b/swh.scrubber.egg-info/requires.txt @@ -1,3 +1,4 @@ +dulwich swh.core[http]>=0.3 swh.loader.git>=1.4.0 swh.model>=5.0.0 diff --git a/swh/scrubber/__init__.py b/swh/scrubber/__init__.py index 2527e351951f2a6b215f9501fff4530f2e94aea3..1c4c0a22f7e7eb2b530eb3c75c41198aec6fe250 100644 --- a/swh/scrubber/__init__.py +++ b/swh/scrubber/__init__.py @@ -8,14 +8,16 @@ from __future__ import annotations from typing import TYPE_CHECKING if TYPE_CHECKING: - from .db import ScrubberDb + from swh.scrubber.db import ScrubberDb def get_scrubber_db(cls: str, **kwargs) -> ScrubberDb: - if cls != "local": - raise ValueError(f"Unknown scrubber db class '{cls}', use 'local' instead.") + if cls not in ("local", "postgresql"): + raise ValueError( + f"Unknown scrubber db class '{cls}', use 'postgresql' instead." + ) - from .db import ScrubberDb + from swh.scrubber.db import ScrubberDb return ScrubberDb.connect(kwargs.pop("db"), **kwargs) diff --git a/swh/scrubber/db.py b/swh/scrubber/db.py index c319497050d9b267c596cb54020682f6a012eb7e..d11b7a0569b012ef0ab21cbc7480b8b60dac5b75 100644 --- a/swh/scrubber/db.py +++ b/swh/scrubber/db.py @@ -45,7 +45,7 @@ class FixedObject: class ScrubberDb(BaseDb): - current_version = 1 + current_version = 2 @functools.lru_cache(1000) def datastore_get_or_add(self, datastore: Datastore) -> int: diff --git a/swh/scrubber/tests/test_cli.py b/swh/scrubber/tests/test_cli.py index b54a5c879efbd82d380691c17a7d4f9629e5c5ea..4b8523717a3597ada30dd24a94f780fd6ac1a380 100644 --- a/swh/scrubber/tests/test_cli.py +++ b/swh/scrubber/tests/test_cli.py @@ -25,7 +25,7 @@ def invoke( runner = CliRunner() config = { - "scrubber_db": {"cls": "local", "db": scrubber_db.conn.dsn}, + "scrubber_db": {"cls": "postgresql", "db": scrubber_db.conn.dsn}, "graph": {"url": "http://graph.example.org:5009/"}, } if storage: @@ -72,7 +72,7 @@ def test_check_storage(mocker, scrubber_db, swh_storage): assert result.exit_code == 0, result.output assert result.output == "" - get_scrubber_db.assert_called_once_with(cls="local", db=scrubber_db.conn.dsn) + get_scrubber_db.assert_called_once_with(cls="postgresql", db=scrubber_db.conn.dsn) StorageChecker.assert_called_once_with( db=scrubber_db, storage=StorageChecker.mock_calls[0][2]["storage"], @@ -103,7 +103,7 @@ def test_check_journal( assert result.exit_code == 0, result.output assert result.output == "" - get_scrubber_db.assert_called_once_with(cls="local", db=scrubber_db.conn.dsn) + get_scrubber_db.assert_called_once_with(cls="postgresql", db=scrubber_db.conn.dsn) JournalChecker.assert_called_once_with( db=scrubber_db, journal_client={ @@ -129,7 +129,7 @@ def test_locate_origins(mocker, scrubber_db, swh_storage): assert result.exit_code == 0, result.output assert result.output == "" - get_scrubber_db.assert_called_once_with(cls="local", db=scrubber_db.conn.dsn) + get_scrubber_db.assert_called_once_with(cls="postgresql", db=scrubber_db.conn.dsn) OriginLocator.assert_called_once_with( db=scrubber_db, storage=OriginLocator.mock_calls[0][2]["storage"], @@ -150,7 +150,7 @@ def test_fix_objects(mocker, scrubber_db): assert result.exit_code == 0, result.output assert result.output == "" - get_scrubber_db.assert_called_once_with(cls="local", db=scrubber_db.conn.dsn) + get_scrubber_db.assert_called_once_with(cls="postgresql", db=scrubber_db.conn.dsn) Fixer.assert_called_once_with( db=scrubber_db, start_object=CoreSWHID.from_string("swh:1:cnt:" + "00" * 20), diff --git a/swh/scrubber/tests/test_init.py b/swh/scrubber/tests/test_init.py new file mode 100644 index 0000000000000000000000000000000000000000..1f80823b0dd9cd73b754521c0d402139bc65bc45 --- /dev/null +++ b/swh/scrubber/tests/test_init.py @@ -0,0 +1,33 @@ +# Copyright (C) 2020-2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from typing import Any + +import pytest + +from swh.scrubber import get_scrubber_db + + +@pytest.mark.parametrize("clz", ["local", "postgresql"]) +def test_get_scrubber_db(mocker, clz): + mock_scrubber = mocker.patch("swh.scrubber.db.ScrubberDb") + + def test_connect(db_str: str, **kwargs) -> Any: + return "connection-result" + + mock_scrubber.connect.side_effect = test_connect + + actual_result = get_scrubber_db(clz, db="service=scrubber-db") + + assert mock_scrubber.connect.called is True + assert actual_result == "connection-result" + + +@pytest.mark.parametrize("clz", ["something", "anything"]) +def test_get_scrubber_db_raise(clz): + assert clz not in ["local", "postgresql"] + + with pytest.raises(ValueError, match="Unknown"): + get_scrubber_db(clz, db="service=scrubber-db")