Skip to content
Snippets Groups Projects
Commit f81db177 authored by Jenkins for Software Heritage's avatar Jenkins for Software Heritage
Browse files

New upstream version 0.1.0

parents c7d2b74b 36d16bcd
No related branches found
Tags debian/upstream/0.1.0
No related merge requests found
Metadata-Version: 2.1
Name: swh.scrubber
Version: 0.0.6
Version: 0.1.0
Summary: Software Heritage Datastore Scrubber
Home-page: https://forge.softwareheritage.org/diffusion/swh-scrubber
Author: Software Heritage developers
......
Metadata-Version: 2.1
Name: swh.scrubber
Version: 0.0.6
Version: 0.1.0
Summary: Software Heritage Datastore Scrubber
Home-page: https://forge.softwareheritage.org/diffusion/swh-scrubber
Author: Software Heritage developers
......
......@@ -45,6 +45,7 @@ swh/scrubber/sql/20-enums.sql
swh/scrubber/sql/30-schema.sql
swh/scrubber/sql/60-indexes.sql
swh/scrubber/sql/upgrades/2.sql
swh/scrubber/sql/upgrades/3.sql
swh/scrubber/tests/__init__.py
swh/scrubber/tests/conftest.py
swh/scrubber/tests/test_cli.py
......
......@@ -7,7 +7,7 @@
import dataclasses
import datetime
import functools
from typing import Iterator, List, Optional
from typing import Iterable, Iterator, List, Optional
import psycopg2
......@@ -36,6 +36,21 @@ class CorruptObject:
object_: bytes
@dataclasses.dataclass(frozen=True)
class MissingObject:
id: CoreSWHID
datastore: Datastore
first_occurrence: datetime.datetime
@dataclasses.dataclass(frozen=True)
class MissingObjectReference:
missing_id: CoreSWHID
reference_id: CoreSWHID
datastore: Datastore
first_occurrence: datetime.datetime
@dataclasses.dataclass(frozen=True)
class FixedObject:
id: CoreSWHID
......@@ -45,7 +60,11 @@ class FixedObject:
class ScrubberDb(BaseDb):
current_version = 2
current_version = 3
####################################
# Shared tables
####################################
@functools.lru_cache(1000)
def datastore_get_or_add(self, datastore: Datastore) -> int:
......@@ -74,9 +93,15 @@ class ScrubberDb(BaseDb):
""",
(dataclasses.asdict(datastore)),
)
(id_,) = cur.fetchone()
res = cur.fetchone()
assert res is not None
(id_,) = res
return id_
####################################
# Inventory of objects with issues
####################################
def corrupt_object_add(
self,
id: CoreSWHID,
......@@ -254,6 +279,113 @@ class ScrubberDb(BaseDb):
)
return self._corrupt_object_list_from_cursor(cur)
def missing_object_add(
self,
id: CoreSWHID,
reference_ids: Iterable[CoreSWHID],
datastore: Datastore,
) -> None:
"""
Adds a "hole" to the inventory, ie. an object missing from a datastore
that is referenced by an other object of the same datastore.
If the missing object is already known to be missing by the scrubber database,
this only records the reference (which can be useful to locate an origin
to recover the object from).
If that reference is already known too, this is a noop.
Args:
id: SWHID of the missing object (the hole)
reference_id: SWHID of the object referencing the missing object
datastore: representation of the swh-storage/swh-journal/... instance
containing this hole
"""
if not reference_ids:
raise ValueError("reference_ids is empty")
datastore_id = self.datastore_get_or_add(datastore)
with self.transaction() as cur:
cur.execute(
"""
INSERT INTO missing_object (id, datastore)
VALUES (%s, %s)
ON CONFLICT DO NOTHING
""",
(str(id), datastore_id),
)
psycopg2.extras.execute_batch(
cur,
"""
INSERT INTO missing_object_reference (missing_id, reference_id, datastore)
VALUES (%s, %s, %s)
ON CONFLICT DO NOTHING
""",
[
(str(id), str(reference_id), datastore_id)
for reference_id in reference_ids
],
)
def missing_object_iter(self) -> Iterator[MissingObject]:
"""Yields all records in the 'missing_object' table."""
with self.transaction() as cur:
cur.execute(
"""
SELECT
mo.id, mo.first_occurrence,
ds.package, ds.class, ds.instance
FROM missing_object AS mo
INNER JOIN datastore AS ds ON (ds.id=mo.datastore)
"""
)
for row in cur:
(id, first_occurrence, ds_package, ds_class, ds_instance) = row
yield MissingObject(
id=CoreSWHID.from_string(id),
first_occurrence=first_occurrence,
datastore=Datastore(
package=ds_package, cls=ds_class, instance=ds_instance
),
)
def missing_object_reference_iter(
self, missing_id: CoreSWHID
) -> Iterator[MissingObjectReference]:
"""Yields all records in the 'missing_object_reference' table."""
with self.transaction() as cur:
cur.execute(
"""
SELECT
mor.reference_id, mor.first_occurrence,
ds.package, ds.class, ds.instance
FROM missing_object_reference AS mor
INNER JOIN datastore AS ds ON (ds.id=mor.datastore)
WHERE mor.missing_id=%s
""",
(str(missing_id),),
)
for row in cur:
(
reference_id,
first_occurrence,
ds_package,
ds_class,
ds_instance,
) = row
yield MissingObjectReference(
missing_id=missing_id,
reference_id=CoreSWHID.from_string(reference_id),
first_occurrence=first_occurrence,
datastore=Datastore(
package=ds_package, cls=ds_class, instance=ds_instance
),
)
####################################
# Issue resolution
####################################
def object_origin_add(
self, cur: psycopg2.extensions.cursor, swhid: CoreSWHID, origins: List[str]
) -> None:
......
-------------------------------------
-- Shared definitions
-------------------------------------
create domain swhid as text check (value ~ '^swh:[0-9]+:.*');
create table datastore
......@@ -14,6 +18,11 @@ comment on column datastore.package is 'Name of the component using this datasto
comment on column datastore.class is 'For datastores with multiple backends, name of the backend (postgresql/cassandra for storage, kafka for journal, pathslicer/azure/winery/... for objstorage)';
comment on column datastore.instance is 'Human-readable way to uniquely identify the datastore; eg. its URL or DSN.';
-------------------------------------
-- Inventory of objects with issues
-------------------------------------
create table corrupt_object
(
id swhid not null,
......@@ -27,6 +36,37 @@ comment on column corrupt_object.datastore is 'Datastore the corrupt object was
comment on column corrupt_object.object is 'Corrupt object, as found in the datastore (possibly msgpack-encoded, using the journal''s serializer)';
comment on column corrupt_object.first_occurrence is 'Moment the object was found to be corrupt for the first time';
create table missing_object
(
id swhid not null,
datastore int not null,
first_occurrence timestamptz not null default now()
);
comment on table missing_object is 'Each row identifies an object that are missing but referenced by another object (aka "holes")';
comment on column missing_object.datastore is 'Datastore where the hole is.';
comment on column missing_object.first_occurrence is 'Moment the object was found to be corrupt for the first time';
create table missing_object_reference
(
missing_id swhid not null,
reference_id swhid not null,
datastore int not null,
first_occurrence timestamptz not null default now()
);
comment on table missing_object_reference is 'Each row identifies an object that points to an object that does not exist (aka a "hole")';
comment on column missing_object_reference.missing_id is 'SWHID of the missing object.';
comment on column missing_object_reference.reference_id is 'SWHID of the object referencing the missing object.';
comment on column missing_object_reference.datastore is 'Datastore where the referencing object is.';
comment on column missing_object_reference.first_occurrence is 'Moment the object was found to reference a missing object';
-------------------------------------
-- Issue resolution
-------------------------------------
create table object_origin
(
object_id swhid not null,
......
-------------------------------------
-- Shared tables
-------------------------------------
-- datastore
create unique index concurrently datastore_pkey on datastore(id);
......@@ -6,6 +10,10 @@ alter table datastore add primary key using index datastore_pkey;
create unique index concurrently datastore_package_class_instance on datastore(package, class, instance);
-------------------------------------
-- Inventory of objects with issues
-------------------------------------
-- corrupt_object
alter table corrupt_object add constraint corrupt_object_datastore_fkey foreign key (datastore) references datastore(id) not valid;
......@@ -14,6 +22,28 @@ alter table corrupt_object validate constraint corrupt_object_datastore_fkey;
create unique index concurrently corrupt_object_pkey on corrupt_object(id, datastore);
alter table corrupt_object add primary key using index corrupt_object_pkey;
-- missing_object
alter table missing_object add constraint missing_object_datastore_fkey foreign key (datastore) references datastore(id) not valid;
alter table missing_object validate constraint missing_object_datastore_fkey;
create unique index concurrently missing_object_pkey on missing_object(id, datastore);
alter table missing_object add primary key using index missing_object_pkey;
-- missing_object_reference
alter table missing_object_reference add constraint missing_object_reference_datastore_fkey foreign key (datastore) references datastore(id) not valid;
alter table missing_object_reference validate constraint missing_object_reference_datastore_fkey;
create unique index concurrently missing_object_reference_missing_id_reference_id_datastore on missing_object_reference(missing_id, reference_id, datastore);
create unique index concurrently missing_object_reference_reference_id_missing_id_datastore on missing_object_reference(reference_id, missing_id, datastore);
-------------------------------------
-- Issue resolution
-------------------------------------
-- object_origin
create unique index concurrently object_origin_pkey on object_origin (object_id, origin_url);
......
-- SWH Scrubber DB schema upgrade
-- from_version: 2
-- to_version: 3
-- description: Add missing_object
create table missing_object
(
id swhid not null,
datastore int not null,
first_occurrence timestamptz not null default now()
);
comment on table missing_object is 'Each row identifies an object that are missing but referenced by another object (aka "holes")';
comment on column missing_object.datastore is 'Datastore where the hole is.';
comment on column missing_object.first_occurrence is 'Moment the object was found to be corrupt for the first time';
create table missing_object_reference
(
missing_id swhid not null,
reference_id swhid not null,
datastore int not null,
first_occurrence timestamptz not null default now()
);
comment on table missing_object_reference is 'Each row identifies an object that points to an object that does not exist (aka a "hole")';
comment on column missing_object_reference.missing_id is 'SWHID of the missing object.';
comment on column missing_object_reference.reference_id is 'SWHID of the object referencing the missing object.';
comment on column missing_object_reference.datastore is 'Datastore where the referencing object is.';
comment on column missing_object_reference.first_occurrence is 'Moment the object was found to reference a missing object';
alter table missing_object add constraint missing_object_datastore_fkey foreign key (datastore) references datastore(id) not valid;
alter table missing_object validate constraint missing_object_datastore_fkey;
create unique index concurrently missing_object_pkey on missing_object(id, datastore);
alter table missing_object add primary key using index missing_object_pkey;
alter table missing_object_reference add constraint missing_object_reference_datastore_fkey foreign key (datastore) references datastore(id) not valid;
alter table missing_object_reference validate constraint missing_object_reference_datastore_fkey;
create unique index concurrently missing_object_reference_missing_id_reference_id_datastore on missing_object_reference(missing_id, reference_id, datastore);
create unique index concurrently missing_object_reference_reference_id_missing_id_datastore on missing_object_reference(reference_id, missing_id, datastore);
......@@ -5,13 +5,24 @@
"""Reads all objects in a swh-storage instance and recomputes their checksums."""
import collections
import contextlib
import dataclasses
import logging
from typing import Iterable, Union
from swh.core.statsd import Statsd
from swh.journal.serializers import value_to_kafka
from swh.model.model import Directory, Release, Revision, Snapshot
from swh.model import swhids
from swh.model.model import (
Content,
Directory,
ObjectType,
Release,
Revision,
Snapshot,
TargetType,
)
from swh.storage import backfill
from swh.storage.interface import StorageInterface
from swh.storage.postgresql.storage import Storage as PostgresqlStorage
......@@ -20,7 +31,7 @@ from .db import Datastore, ScrubberDb
logger = logging.getLogger(__name__)
ScrubbableObject = Union[Revision, Release, Snapshot, Directory]
ScrubbableObject = Union[Revision, Release, Snapshot, Directory, Content]
@contextlib.contextmanager
......@@ -47,6 +58,7 @@ class StorageChecker:
"""maximum value of the hexdigest of the object's sha1."""
_datastore = None
_statsd = None
def datastore_info(self) -> Datastore:
"""Returns a :class:`Datastore` instance representing the swh-storage instance
......@@ -65,6 +77,14 @@ class StorageChecker:
)
return self._datastore
def statsd(self) -> Statsd:
if self._statsd is None:
self._statsd = Statsd(
namespace="swh_scrubber",
constant_tags={"object_type": self.object_type},
)
return self._statsd
def run(self):
"""Runs on all objects of ``object_type`` and with id between
``start_object`` and ``end_object``.
......@@ -81,7 +101,7 @@ class StorageChecker:
for range_start, range_end in backfill.RANGE_GENERATORS[self.object_type](
self.start_object, self.end_object
):
logger.info(
logger.debug(
"Processing %s range %s to %s",
self.object_type,
backfill._format_range_bound(range_start),
......@@ -93,14 +113,168 @@ class StorageChecker:
)
objects = list(objects)
self.process_objects(objects)
with self.statsd().timed(
"batch_duration_seconds", tags={"operation": "check_hashes"}
):
self.check_object_hashes(objects)
with self.statsd().timed(
"batch_duration_seconds", tags={"operation": "check_references"}
):
self.check_object_references(objects)
def process_objects(self, objects: Iterable[ScrubbableObject]):
def check_object_hashes(self, objects: Iterable[ScrubbableObject]):
"""Recomputes hashes, and reports mismatches."""
count = 0
for object_ in objects:
if isinstance(object_, Content):
# TODO
continue
real_id = object_.compute_hash()
count += 1
if object_.id != real_id:
self.statsd().increment("hash_mismatch_total")
self.db.corrupt_object_add(
object_.swhid(),
self.datastore_info(),
value_to_kafka(object_.to_dict()),
)
if count:
self.statsd().increment("objects_hashed_total", count)
def check_object_references(self, objects: Iterable[ScrubbableObject]):
"""Check all objects references by these objects exist."""
cnt_references = collections.defaultdict(set)
dir_references = collections.defaultdict(set)
rev_references = collections.defaultdict(set)
rel_references = collections.defaultdict(set)
snp_references = collections.defaultdict(set)
for object_ in objects:
swhid = object_.swhid()
if isinstance(object_, Content):
pass
elif isinstance(object_, Directory):
for entry in object_.entries:
if entry.type == "file":
cnt_references[entry.target].add(swhid)
elif entry.type == "dir":
dir_references[entry.target].add(swhid)
elif entry.type == "rev":
# dir->rev holes are not considered a problem because they
# happen whenever git submodules point to repositories that
# were not loaded yet; ignore them
pass
else:
assert False, entry
elif isinstance(object_, Revision):
dir_references[object_.directory].add(swhid)
for parent in object_.parents:
rev_references[parent].add(swhid)
elif isinstance(object_, Release):
if object_.target is None:
pass
elif object_.target_type == ObjectType.CONTENT:
cnt_references[object_.target].add(swhid)
elif object_.target_type == ObjectType.DIRECTORY:
dir_references[object_.target].add(swhid)
elif object_.target_type == ObjectType.REVISION:
rev_references[object_.target].add(swhid)
elif object_.target_type == ObjectType.RELEASE:
rel_references[object_.target].add(swhid)
else:
assert False, object_
elif isinstance(object_, Snapshot):
for branch in object_.branches.values():
if branch is None:
pass
elif branch.target_type == TargetType.ALIAS:
pass
elif branch.target_type == TargetType.CONTENT:
cnt_references[branch.target].add(swhid)
elif branch.target_type == TargetType.DIRECTORY:
dir_references[branch.target].add(swhid)
elif branch.target_type == TargetType.REVISION:
rev_references[branch.target].add(swhid)
elif branch.target_type == TargetType.RELEASE:
rel_references[branch.target].add(swhid)
elif branch.target_type == TargetType.SNAPSHOT:
snp_references[branch.target].add(swhid)
else:
assert False, (str(object_.swhid()), branch)
else:
assert False, object_.swhid()
missing_cnts = set(
self.storage.content_missing_per_sha1_git(list(cnt_references))
)
missing_dirs = set(self.storage.directory_missing(list(dir_references)))
missing_revs = set(self.storage.revision_missing(list(rev_references)))
missing_rels = set(self.storage.release_missing(list(rel_references)))
missing_snps = set(self.storage.snapshot_missing(list(snp_references)))
self.statsd().increment(
"missing_object_total",
len(missing_cnts),
tags={"target_object_type": "content"},
)
self.statsd().increment(
"missing_object_total",
len(missing_dirs),
tags={"target_object_type": "directory"},
)
self.statsd().increment(
"missing_object_total",
len(missing_revs),
tags={"target_object_type": "revision"},
)
self.statsd().increment(
"missing_object_total",
len(missing_rels),
tags={"target_object_type": "release"},
)
self.statsd().increment(
"missing_object_total",
len(missing_snps),
tags={"target_object_type": "snapshot"},
)
for missing_id in missing_cnts:
missing_swhid = swhids.CoreSWHID(
object_type=swhids.ObjectType.CONTENT, object_id=missing_id
)
self.db.missing_object_add(
missing_swhid, cnt_references[missing_id], self.datastore_info()
)
for missing_id in missing_dirs:
missing_swhid = swhids.CoreSWHID(
object_type=swhids.ObjectType.DIRECTORY, object_id=missing_id
)
self.db.missing_object_add(
missing_swhid, dir_references[missing_id], self.datastore_info()
)
for missing_id in missing_revs:
missing_swhid = swhids.CoreSWHID(
object_type=swhids.ObjectType.REVISION, object_id=missing_id
)
self.db.missing_object_add(
missing_swhid, rev_references[missing_id], self.datastore_info()
)
for missing_id in missing_rels:
missing_swhid = swhids.CoreSWHID(
object_type=swhids.ObjectType.RELEASE, object_id=missing_id
)
self.db.missing_object_add(
missing_swhid, rel_references[missing_id], self.datastore_info()
)
for missing_id in missing_snps:
missing_swhid = swhids.CoreSWHID(
object_type=swhids.ObjectType.SNAPSHOT, object_id=missing_id
)
self.db.missing_object_add(
missing_swhid, snp_references[missing_id], self.datastore_info()
)
......@@ -8,15 +8,14 @@ from functools import partial
import pytest
from pytest_postgresql import factories
from swh.core.db.pytest_plugin import initialize_database_for_module, postgresql_fact
from swh.core.db.pytest_plugin import initialize_database_for_module
from swh.scrubber.db import ScrubberDb
scrubber_postgresql_proc = factories.postgresql_proc(
dbname="scrubber",
load=[partial(initialize_database_for_module, modname="scrubber", version=1)],
)
postgresql_scrubber = postgresql_fact("scrubber_postgresql_proc")
postgresql_scrubber = factories.postgresql("scrubber_postgresql_proc")
@pytest.fixture
......
......@@ -10,12 +10,55 @@ import attr
import pytest
from swh.journal.serializers import kafka_to_value
from swh.model import swhids
from swh.model import model, swhids
from swh.model.tests import swh_model_data
from swh.scrubber.storage_checker import StorageChecker
from swh.storage.backfill import byte_ranges
# decorator to make swh.storage.backfill use less ranges, so tests run faster
CONTENT1 = model.Content.from_data(b"foo")
DIRECTORY1 = model.Directory(
entries=(
model.DirectoryEntry(
target=CONTENT1.sha1_git, type="file", name=b"file1", perms=0o1
),
)
)
DIRECTORY2 = model.Directory(
entries=(
model.DirectoryEntry(
target=CONTENT1.sha1_git, type="file", name=b"file2", perms=0o1
),
model.DirectoryEntry(target=DIRECTORY1.id, type="dir", name=b"dir1", perms=0o1),
model.DirectoryEntry(target=b"\x00" * 20, type="rev", name=b"rev1", perms=0o1),
)
)
REVISION1 = model.Revision(
message=b"blah",
directory=DIRECTORY2.id,
author=None,
committer=None,
date=None,
committer_date=None,
type=model.RevisionType.GIT,
synthetic=True,
)
RELEASE1 = model.Release(
message=b"blih",
name=b"bluh",
target_type=model.ObjectType.REVISION,
target=REVISION1.id,
synthetic=True,
)
SNAPSHOT1 = model.Snapshot(
branches={
b"rel1": model.SnapshotBranch(
target_type=model.TargetType.RELEASE, target=RELEASE1.id
),
}
)
# decorator to make swh.storage.backfill use fewer ranges, so tests run faster
patch_byte_ranges = unittest.mock.patch(
"swh.storage.backfill.byte_ranges",
lambda numbits, start, end: byte_ranges(numbits // 8, start, end),
......@@ -44,6 +87,7 @@ def test_no_corruption(scrubber_db, swh_storage):
@pytest.mark.parametrize("corrupt_idx", range(len(swh_model_data.SNAPSHOTS)))
@patch_byte_ranges
def test_corrupt_snapshot(scrubber_db, swh_storage, corrupt_idx):
storage_dsn = swh_storage.get_db().conn.dsn
snapshots = list(swh_model_data.SNAPSHOTS)
snapshots[corrupt_idx] = attr.evolve(snapshots[corrupt_idx], id=b"\x00" * 20)
swh_storage.snapshot_add(snapshots)
......@@ -66,9 +110,7 @@ def test_corrupt_snapshot(scrubber_db, swh_storage, corrupt_idx):
)
assert corrupt_objects[0].datastore.package == "storage"
assert corrupt_objects[0].datastore.cls == "postgresql"
assert corrupt_objects[0].datastore.instance.startswith(
"user=postgres password=xxx dbname=storage host="
)
assert corrupt_objects[0].datastore.instance.startswith(storage_dsn)
assert (
before_date - datetime.timedelta(seconds=5)
<= corrupt_objects[0].first_occurrence
......@@ -143,3 +185,107 @@ def test_corrupt_snapshots_different_batches(scrubber_db, swh_storage):
"swh:1:snp:ffffffffffffffffffffffffffffffffffffffff",
]
}
@patch_byte_ranges
def test_no_hole(scrubber_db, swh_storage):
swh_storage.content_add([CONTENT1])
swh_storage.directory_add([DIRECTORY1, DIRECTORY2])
swh_storage.revision_add([REVISION1])
swh_storage.release_add([RELEASE1])
swh_storage.snapshot_add([SNAPSHOT1])
for object_type in ("snapshot", "release", "revision", "directory"):
StorageChecker(
db=scrubber_db,
storage=swh_storage,
object_type=object_type,
start_object="00" * 20,
end_object="ff" * 20,
).run()
assert list(scrubber_db.missing_object_iter()) == []
@pytest.mark.parametrize(
"missing_object",
["content1", "directory1", "directory2", "revision1", "release1"],
)
@patch_byte_ranges
def test_one_hole(scrubber_db, swh_storage, missing_object):
if missing_object == "content1":
missing_swhid = CONTENT1.swhid()
reference_swhids = [DIRECTORY1.swhid(), DIRECTORY2.swhid()]
else:
swh_storage.content_add([CONTENT1])
if missing_object == "directory1":
missing_swhid = DIRECTORY1.swhid()
reference_swhids = [DIRECTORY2.swhid()]
else:
swh_storage.directory_add([DIRECTORY1])
if missing_object == "directory2":
missing_swhid = DIRECTORY2.swhid()
reference_swhids = [REVISION1.swhid()]
else:
swh_storage.directory_add([DIRECTORY2])
if missing_object == "revision1":
missing_swhid = REVISION1.swhid()
reference_swhids = [RELEASE1.swhid()]
else:
swh_storage.revision_add([REVISION1])
if missing_object == "release1":
missing_swhid = RELEASE1.swhid()
reference_swhids = [SNAPSHOT1.swhid()]
else:
swh_storage.release_add([RELEASE1])
swh_storage.snapshot_add([SNAPSHOT1])
for object_type in ("snapshot", "release", "revision", "directory"):
StorageChecker(
db=scrubber_db,
storage=swh_storage,
object_type=object_type,
start_object="00" * 20,
end_object="ff" * 20,
).run()
assert [mo.id for mo in scrubber_db.missing_object_iter()] == [missing_swhid]
assert {
(mor.missing_id, mor.reference_id)
for mor in scrubber_db.missing_object_reference_iter(missing_swhid)
} == {(missing_swhid, reference_swhid) for reference_swhid in reference_swhids}
@patch_byte_ranges
def test_two_holes(scrubber_db, swh_storage):
# missing content and revision
swh_storage.directory_add([DIRECTORY1, DIRECTORY2])
swh_storage.release_add([RELEASE1])
swh_storage.snapshot_add([SNAPSHOT1])
for object_type in ("snapshot", "release", "revision", "directory"):
StorageChecker(
db=scrubber_db,
storage=swh_storage,
object_type=object_type,
start_object="00" * 20,
end_object="ff" * 20,
).run()
assert {mo.id for mo in scrubber_db.missing_object_iter()} == {
CONTENT1.swhid(),
REVISION1.swhid(),
}
assert {
mor.reference_id
for mor in scrubber_db.missing_object_reference_iter(CONTENT1.swhid())
} == {DIRECTORY1.swhid(), DIRECTORY2.swhid()}
assert {
mor.reference_id
for mor in scrubber_db.missing_object_reference_iter(REVISION1.swhid())
} == {RELEASE1.swhid()}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment