Skip to content
Snippets Groups Projects
Commit f9d316be authored by Jenkins for Software Heritage's avatar Jenkins for Software Heritage
Browse files

Merge tag 'debian/0.1.2-1_swh1' into debian/buster-swh

parents 2deb6bcb ba944d3c
No related branches found
No related tags found
No related merge requests found
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.4.0
rev: v4.3.0
hooks:
- id: trailing-whitespace
- id: check-json
- id: check-yaml
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.3
- repo: https://github.com/pycqa/flake8
rev: 5.0.4
hooks:
- id: flake8
- repo: https://github.com/codespell-project/codespell
rev: v1.16.0
rev: v2.2.2
hooks:
- id: codespell
args: [-L mor]
......@@ -39,11 +39,11 @@ repos:
# types: [python]
- repo: https://github.com/PyCQA/isort
rev: 5.5.2
rev: 5.10.1
hooks:
- id: isort
- repo: https://github.com/python/black
rev: 22.3.0
rev: 22.10.0
hooks:
- id: black
Metadata-Version: 2.1
Name: swh.scrubber
Version: 0.1.1
Version: 0.1.2
Summary: Software Heritage Datastore Scrubber
Home-page: https://forge.softwareheritage.org/diffusion/swh-scrubber
Author: Software Heritage developers
......
swh-scrubber (0.1.1-1~swh1~bpo10+1) buster-swh; urgency=medium
swh-scrubber (0.1.2-1~swh1) unstable-swh; urgency=medium
* Rebuild for buster-swh
* New upstream release 0.1.2 - (tagged by Valentin Lorentz
<vlorentz@softwareheritage.org> on 2022-12-20 10:42:49 +0100)
* Upstream changes: - v0.1.2 - * docs: Include module indices
only when building standalone package doc - * sql: Fix typos
detected by codespell - * pre-commit, tox: Bump pre-commit,
codespell, black and flake8 - * storage_checker: Retry on
postgresql errors from swh-storage
-- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org> Mon, 17 Oct 2022 13:12:46 +0000
-- Software Heritage autobuilder (on jenkins-debian1) <jenkins@jenkins-debian1.internal.softwareheritage.org> Tue, 20 Dec 2022 09:46:50 +0000
swh-scrubber (0.1.1-1~swh1) unstable-swh; urgency=medium
......
......@@ -7,9 +7,11 @@
:caption: Contents:
Indices and tables
------------------
.. only:: standalone_package_doc
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
Indices and tables
------------------
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
......@@ -3,3 +3,5 @@
# dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html
dulwich
psycopg2
tenacity
Metadata-Version: 2.1
Name: swh.scrubber
Version: 0.1.1
Version: 0.1.2
Summary: Software Heritage Datastore Scrubber
Home-page: https://forge.softwareheritage.org/diffusion/swh-scrubber
Author: Software Heritage developers
......
dulwich
psycopg2
tenacity
swh.core[http]>=0.3
swh.loader.git>=1.4.0
swh.model>=5.0.0
......
......@@ -31,7 +31,7 @@ create table checked_range
last_date timestamptz not null
);
comment on table checked_range is 'Each row represents a range of objects in a datastore that were fetched, checksumed, and checked at some point in the past.';
comment on table checked_range is 'Each row represents a range of objects in a datastore that were fetched, checksummed, and checked at some point in the past.';
comment on column checked_range.range_start is 'First SWHID of the range that was checked (inclusive, possibly non-existent).';
comment on column checked_range.range_end is 'Last SWHID of the range that was checked (inclusive, possiby non-existent).';
comment on column checked_range.last_date is 'Date the last scrub of that range *started*.';
......
......@@ -12,7 +12,7 @@ create table checked_range
last_date timestamptz not null
);
comment on table checked_range is 'Each row represents a range of objects in a datastore that were fetched, checksumed, and checked at some point in the past.';
comment on table checked_range is 'Each row represents a range of objects in a datastore that were fetched, checksummed, and checked at some point in the past.';
comment on column checked_range.range_start is 'First SWHID of the range that was checked (inclusive, possibly non-existent).';
comment on column checked_range.range_end is 'Last SWHID of the range that was checked (inclusive, possiby non-existent).';
comment on column checked_range.last_date is 'Date the last scrub of that range *started*.';
......
......@@ -12,6 +12,9 @@ import datetime
import logging
from typing import Iterable, Optional, Tuple, Union
import psycopg2
import tenacity
from swh.core.statsd import Statsd
from swh.journal.serializers import value_to_kafka
from swh.model import swhids
......@@ -128,14 +131,13 @@ class StorageChecker:
``start_object`` and ``end_object``.
"""
if isinstance(self.storage, PostgresqlStorage):
with storage_db(self.storage) as db:
return self._check_postgresql(db)
return self._check_postgresql()
else:
raise NotImplementedError(
f"StorageChecker(storage={self.storage!r}).check_storage()"
)
def _check_postgresql(self, db):
def _check_postgresql(self):
object_type = getattr(swhids.ObjectType, self.object_type.upper())
for range_start, range_end in backfill.RANGE_GENERATORS[self.object_type](
self.start_object, self.end_object
......@@ -172,6 +174,27 @@ class StorageChecker:
backfill._format_range_bound(range_end),
)
self._check_postgresql_range(object_type, range_start, range_end)
self.db.checked_range_upsert(
self.datastore_info(),
range_start_swhid,
range_end_swhid,
start_time,
)
@tenacity.retry(
retry=tenacity.retry_if_exception_type(psycopg2.OperationalError),
wait=tenacity.wait_random_exponential(min=10, max=180),
)
def _check_postgresql_range(
self, object_type: swhids.ObjectType, range_start, range_end
) -> None:
assert isinstance(
self.storage, PostgresqlStorage
), f"_check_postgresql_range called with self.storage={self.storage!r}"
with storage_db(self.storage) as db:
objects = backfill.fetch(
db, self.object_type, start=range_start, end=range_end
)
......@@ -186,13 +209,6 @@ class StorageChecker:
):
self.check_object_references(objects)
self.db.checked_range_upsert(
self.datastore_info(),
range_start_swhid,
range_end_swhid,
start_time,
)
def check_object_hashes(self, objects: Iterable[ScrubbableObject]):
"""Recomputes hashes, and reports mismatches."""
count = 0
......
......@@ -15,15 +15,16 @@ commands =
[testenv:black]
skip_install = true
deps =
black==22.3.0
black==22.10.0
commands =
{envpython} -m black --check swh
[testenv:flake8]
skip_install = true
deps =
flake8==4.0.1
flake8-bugbear==22.3.23
flake8==5.0.4
flake8-bugbear==22.9.23
pycodestyle==2.9.1
commands =
{envpython} -m flake8
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment