Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • anlambert/swh-loader-svn
  • lunar/swh-loader-svn
  • ardumont/swh-loader-svn
  • swh/devel/swh-loader-svn
  • douardda/swh-loader-svn
  • marmoute/swh-loader-svn
6 results
Show changes
Commits on Source (31)
Showing
with 770 additions and 221 deletions
# Changes here will be overwritten by Copier
_commit: v0.1.6
_commit: v0.3.3
_src_path: https://gitlab.softwareheritage.org/swh/devel/swh-py-template.git
description: Software Heritage Loader SVN
distribution_name: swh-loader-svn
......
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: check-json
- id: check-yaml
- repo: https://github.com/python/black
rev: 23.1.0
rev: 25.1.0
hooks:
- id: black
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
rev: 6.0.0
hooks:
- id: isort
- repo: https://github.com/pycqa/flake8
rev: 6.0.0
rev: 7.1.1
hooks:
- id: flake8
additional_dependencies: [flake8-bugbear==22.9.23]
additional_dependencies: [flake8-bugbear==24.12.12, flake8-pyproject]
- repo: https://github.com/codespell-project/codespell
rev: v2.2.2
rev: v2.4.1
hooks:
- id: codespell
name: Check source code spelling
stages: [commit]
stages: [pre-commit]
- id: codespell
name: Check commit message spelling
stages: [commit-msg]
......@@ -41,4 +41,13 @@ repos:
pass_filenames: false
language: system
types: [python]
- id: twine-check
name: twine check
description: call twine check when pushing an annotated release tag
entry: bash -c "ref=$(git describe) &&
[[ $ref =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] &&
(python3 -m build --sdist && twine check $(ls -t dist/* | head -1)) || true"
pass_filenames: false
stages: [pre-push]
language: python
additional_dependencies: [twine, build]
......@@ -6,7 +6,7 @@ In the interest of fostering an open and welcoming environment, we as Software
Heritage contributors and maintainers pledge to making participation in our
project and our community a harassment-free experience for everyone, regardless
of age, body size, disability, ethnicity, sex characteristics, gender identity
and expression, level of experience, education, socio-economic status,
and expression, level of experience, education, socioeconomic status,
nationality, personal appearance, race, religion, or sexual identity and
orientation.
......
[mypy]
namespace_packages = True
warn_unused_ignores = True
explicit_package_bases = True
# ^ Needed for mypy to detect py.typed from swh packages installed
# in editable mode
# 3rd party libraries without stubs (yet)
[mypy-celery.*]
ignore_missing_imports = True
[mypy-iso8601.*]
ignore_missing_imports = True
[mypy-pkg_resources.*]
ignore_missing_imports = True
[mypy-pytest.*]
ignore_missing_imports = True
[mypy-subvertpy.*]
ignore_missing_imports = True
......@@ -28,6 +28,7 @@ testing = {file = ["requirements.txt", "requirements-swh.txt", "requirements-tes
[project.entry-points."swh.workers"]
"loader.svn" = "swh.loader.svn:register"
"loader.svn-export" = "swh.loader.svn:register_export"
"loader.svn-no-dump" = "swh.loader.svn:register_no_dump"
[project.urls]
"Homepage" = "https://gitlab.softwareheritage.org/swh/devel/swh-loader-svn"
......@@ -44,7 +45,7 @@ build-backend = "setuptools.build_meta"
fallback_version = "0.0.1"
[tool.black]
target-version = ['py37']
target-version = ['py39', 'py310', 'py311', 'py312']
[tool.isort]
multi_line_output = 3
......@@ -55,3 +56,34 @@ ensure_newline_before_comments = true
line_length = 88
force_sort_within_sections = true
known_first_party = ['swh']
[tool.mypy]
namespace_packages = true
warn_unused_ignores = true
explicit_package_bases = true
# ^ Needed for mypy to detect py.typed from swh packages installed
# in editable mode
plugins = []
# 3rd party libraries without stubs (yet)
[[tool.mypy.overrides]]
module = [
"subvertpy.*",
]
ignore_missing_imports = true
[tool.flake8]
select = ["C", "E", "F", "W", "B950"]
ignore = [
"E203", # whitespaces before ':' <https://github.com/psf/black/issues/315>
"E231", # missing whitespace after ','
"E501", # line too long, use B950 warning from flake8-bugbear instead
"W503" # line break before binary operator <https://github.com/psf/black/issues/52>
]
max-line-length = 88
[tool.pytest.ini_options]
norecursedirs = "build docs .*"
asyncio_mode = "strict"
consider_namespace_packages = true
[pytest]
norecursedirs = build docs .*
asyncio_mode = strict
# Drop this when these fixtures aren't imported automatically
addopts = --import-mode=importlib -p no:pytest_swh_scheduler -p no:pytest_swh_storage
markers =
fs: execute tests that write to the filesystem
swh.storage >= 0.11.3
swh.model >= 6.6.0
swh.storage >= 2.4.1
swh.model >= 6.15.0
swh.scheduler >= 0.0.39
swh.loader.core >= 5.14.2
swh.loader.core >= 5.18.3
pytest
celery-types
pytest >= 8.1
pytest-mock
pytest-postgresql
swh.core[http] >= 0.0.61
swh.loader.core[testing] >= 5.18.1
types-click
types-python-dateutil
......@@ -4,5 +4,5 @@
click
iso8601
subvertpy >= 0.9.4
tenacity >= 6.2
tenacity >= 8.4.2
typing-extensions
[flake8]
# E203: whitespaces before ':' <https://github.com/psf/black/issues/315>
# E231: missing whitespace after ','
# E501: line too long, use B950 warning from flake8-bugbear instead
# W503: line break before binary operator <https://github.com/psf/black/issues/52>
select = C,E,F,W,B950
ignore = E203,E231,E501,W503
max-line-length = 88
......@@ -22,3 +22,12 @@ def register_export() -> Dict[str, Any]:
"task_modules": [],
"loader": SvnExportLoader,
}
def register_no_dump() -> Dict[str, Any]:
from swh.loader.svn.loader import SvnLoader
return {
"task_modules": [f"{__name__}.tasks"],
"loader": SvnLoader,
}
# Copyright (C) 2023 The Software Heritage developers
# Copyright (C) 2023-2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Loader in charge of injecting tree at a specific revision.
"""
"""Loader in charge of injecting tree at a specific revision."""
from datetime import datetime
import os
from pathlib import Path
from typing import Iterator, Optional
import tempfile
from typing import Iterator, List, Optional
from swh.loader.core.loader import BaseDirectoryLoader
from swh.loader.svn.svn_repo import SvnRepo, get_svn_repo
from swh.model.model import Snapshot, SnapshotBranch, TargetType
from swh.model.model import Snapshot, SnapshotBranch, SnapshotTargetType
class SvnExportLoader(BaseDirectoryLoader):
"""Svn export (of a tree) loader at a specific svn revision or tag (release) into
the swh archive.
"""Load a svn tree at a specific svn revision into the swh archive.
It is also possible to load a subset of the source tree by explicitly
specifying the sub-paths to export in the ``svn_paths`` optional parameter.
If the origin URL should be different from the subversion URL, the latter
can be provided using the optional ``svn_url`` parameter.
The output snapshot is of the form:
......@@ -36,13 +42,23 @@ class SvnExportLoader(BaseDirectoryLoader):
visit_type = "svn-export"
def __init__(self, *args, **kwargs):
def __init__(
self,
*args,
svn_paths: Optional[List[str]] = None,
svn_url: Optional[str] = None,
**kwargs,
):
self.svn_revision = kwargs.pop("ref")
self.svn_paths = svn_paths
super().__init__(*args, **kwargs)
self.svn_url = svn_url
if self.svn_url is None:
self.svn_url = self.origin.url
self.svnrepo: Optional[SvnRepo] = None
def prepare(self) -> None:
self.svnrepo = get_svn_repo(self.origin.url)
self.svnrepo = get_svn_repo(self.svn_url)
super().prepare()
def cleanup(self) -> None:
......@@ -53,8 +69,34 @@ class SvnExportLoader(BaseDirectoryLoader):
def fetch_artifact(self) -> Iterator[Path]:
"""Prepare the svn local repository checkout at a given commit/tag."""
assert self.svnrepo is not None
_, local_url = self.svnrepo.export_temporary(self.svn_revision)
yield Path(local_url.decode())
if self.svn_paths is None:
_, local_url = self.svnrepo.export_temporary(self.svn_revision)
yield Path(local_url.decode())
else:
assert self.svn_url is not None
self.log.debug(
"Exporting from the svn source tree rooted at %s@%s the sub-paths: %s",
self.svn_url,
self.svn_revision,
", ".join(self.svn_paths),
)
with tempfile.TemporaryDirectory(
suffix="-" + datetime.now().isoformat()
) as tmp_dir:
for svn_path in self.svn_paths:
svn_url = os.path.join(self.svn_url, svn_path.strip("/"))
export_path = os.path.join(tmp_dir, svn_path.strip("/"))
os.makedirs("/".join(export_path.split("/")[:-1]), exist_ok=True)
self.svnrepo.export(
svn_url,
export_path,
rev=int(self.svn_revision),
remove_dest_path=False,
overwrite=True,
ignore_externals=True,
ignore_keywords=True,
)
yield Path(tmp_dir)
def build_snapshot(self) -> Snapshot:
"""Build snapshot without losing the svn revision context."""
......@@ -63,11 +105,11 @@ class SvnExportLoader(BaseDirectoryLoader):
return Snapshot(
branches={
b"HEAD": SnapshotBranch(
target_type=TargetType.ALIAS,
target_type=SnapshotTargetType.ALIAS,
target=branch_name,
),
branch_name: SnapshotBranch(
target_type=TargetType.DIRECTORY,
target_type=SnapshotTargetType.DIRECTORY,
target=self.directory.hash,
),
}
......
# Copyright (C) 2015-2023 The Software Heritage developers
# Copyright (C) 2015-2025 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -16,11 +16,14 @@ import shutil
from subprocess import PIPE, Popen
import tempfile
from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple
from urllib.parse import urlparse, urlunparse
from subvertpy import SubversionException
from swh.loader.core.loader import BaseLoader
from swh.loader.core.utils import clean_dangling_folders
from swh.loader.exception import NotFound
from swh.loader.svn.svn_repo import get_svn_repo
from swh.loader.svn.svn_repo import SvnRepo, get_svn_repo
from swh.model import from_disk, hashutil
from swh.model.model import (
Content,
......@@ -29,7 +32,7 @@ from swh.model.model import (
SkippedContent,
Snapshot,
SnapshotBranch,
TargetType,
SnapshotTargetType,
)
from swh.storage.algos.snapshot import snapshot_get_latest
from swh.storage.interface import StorageInterface
......@@ -67,6 +70,7 @@ class SvnLoader(BaseLoader):
temp_directory: str = "/tmp",
debug: bool = False,
check_revision: int = 0,
check_revision_from: int = 0,
**kwargs: Any,
):
"""Load a svn repository (either remote or local).
......@@ -98,6 +102,7 @@ class SvnLoader(BaseLoader):
self.skip_post_load = False
# Revision check is configurable
self.check_revision = check_revision
self.check_revision_from = check_revision_from
# internal state used to store swh objects
self._contents: List[Content] = []
self._skipped_contents: List[SkippedContent] = []
......@@ -115,6 +120,41 @@ class SvnLoader(BaseLoader):
self.latest_snapshot = None
self.latest_revision: Optional[Revision] = None
def svn_repo(
self, remote_url: str, origin_url: str, temp_dir: str
) -> Optional[SvnRepo]:
parsed_remote_url = urlparse(remote_url)
if parsed_remote_url.scheme.startswith("http"):
# if remote svn URL has http(s) scheme, we want to check if the
# communication with the repository can be done with the svn protocol
# as it is much faster
svn_urls = [
urlunparse(parsed_remote_url._replace(scheme="svn")),
remote_url,
]
else:
svn_urls = [remote_url]
last_exc: Optional[Exception] = None
svnrepo = None
for svn_url in svn_urls:
try:
svnrepo = get_svn_repo(
svn_url,
origin_url,
temp_dir,
self.max_content_size,
debug=self.debug,
)
break
except (NotFound, SubversionException) as exc:
# Keep trying until the last URL in the list
last_exc = exc
else:
if last_exc:
raise last_exc
return svnrepo
def pre_cleanup(self):
"""Cleanup potential dangling files from prior runs (e.g. OOM killed
tasks)
......@@ -174,7 +214,9 @@ Local repository not cleaned up for investigation: %s""",
"""
storage = self.storage
latest_snapshot = snapshot_get_latest(storage, origin_url)
latest_snapshot = snapshot_get_latest(
storage, origin_url, visit_type=self.visit_type
)
if not latest_snapshot:
return None
branches = latest_snapshot.branches
......@@ -183,7 +225,7 @@ Local repository not cleaned up for investigation: %s""",
branch = branches.get(DEFAULT_BRANCH)
if not branch:
return None
if branch.target_type != TargetType.REVISION:
if branch.target_type != SnapshotTargetType.REVISION:
return None
swh_id = branch.target
......@@ -317,23 +359,25 @@ Local repository not cleaned up for investigation: %s""",
# before the last revision to load)
if self.debug and dir_id == dir.hash:
for obj in checked_dir.iter_tree():
path = obj.data["path"].replace(checked_dir.data["path"], b"")
path = obj.data["path"].replace(
checked_dir.data["path"] + b"/", b""
)
if not path:
# ignore root directory
continue
if path not in dir:
self.log.debug(
"%s with path %s is missing in reconstructed repository filesystem",
obj.object_type, # type: ignore
obj.object_type,
path,
)
elif dir[path].hash != checked_dir[path].hash:
self.log.debug(
"%s with path %s has different hash in reconstructed repository filesystem", # noqa
obj.object_type, # type: ignore
obj.object_type,
path,
)
if obj.object_type == "content": # type: ignore
if obj.object_type == "content":
self.log.debug(
"expected sha1: %s, actual sha1: %s",
hashutil.hash_to_hex(checked_dir[path].data["sha1"]),
......@@ -358,9 +402,10 @@ Local repository not cleaned up for investigation: %s""",
ignore_keywords=True,
overwrite=True,
)
with open(export_path, "rb") as exported_file, open(
dir[path].data["path"], "rb"
) as checkout_file:
with (
open(export_path, "rb") as exported_file,
open(dir[path].data["path"], "rb") as checkout_file,
):
diff_lines = difflib.diff_bytes(
difflib.unified_diff,
exported_file.read().split(b"\n"),
......@@ -426,7 +471,7 @@ Local repository not cleaned up for investigation: %s""",
if (
self.check_revision
and self.check_revision != 0
and rev >= self.check_revision_from
and count % self.check_revision == 0
):
self._check_revision_divergence(rev, dir_id, root_directory)
......@@ -452,12 +497,10 @@ Local repository not cleaned up for investigation: %s""",
local_dirname = self._create_tmp_dir(self.temp_directory)
self.svnrepo = get_svn_repo(
self.svnrepo = self.svn_repo(
self.svn_url,
self.origin.url,
local_dirname,
self.max_content_size,
debug=self.debug,
)
try:
......@@ -562,7 +605,7 @@ Local repository not cleaned up for investigation: %s""",
snap = Snapshot(
branches={
DEFAULT_BRANCH: SnapshotBranch(
target=revision.id, target_type=TargetType.REVISION
target=revision.id, target_type=SnapshotTargetType.REVISION
)
}
)
......@@ -716,7 +759,7 @@ class SvnLoaderFromRemoteDump(SvnLoader):
return svn_revision
def dump_svn_revisions(
self, svn_url: str, last_loaded_svn_rev: int = -1
self, svn_url: str, max_rev: int = -1, last_loaded_svn_rev: int = -1
) -> Tuple[str, int]:
"""Generate a compressed subversion dump file using the svnrdump tool and gzip.
If the svnrdump command failed somehow, the produced dump file is analyzed to
......@@ -739,6 +782,8 @@ class SvnLoaderFromRemoteDump(SvnLoader):
"--password",
self.svnrepo.password,
]
if max_rev > 0:
svnrdump_cmd.append(f"-r0:{max_rev}")
# Launch the svnrdump command while capturing stderr as
# successfully dumped revision numbers are printed to it
......@@ -827,18 +872,32 @@ class SvnLoaderFromRemoteDump(SvnLoader):
# subversion origin and get the number of the last one
last_loaded_svn_rev = self.get_last_loaded_svn_rev(self.origin.url)
self.svnrepo = get_svn_repo(
self.origin.url,
self.origin.url,
self.temp_dir,
self.max_content_size,
debug=self.debug,
)
self.svnrepo = self.svn_repo(self.origin.url, self.origin.url, self.temp_dir)
# Ensure to use remote URL retrieved by SvnRepo as origin URL might redirect
# and svnrdump does not handle URL redirection
self.svn_url = self.svnrepo.remote_url
max_rev = -1
if self.svnrepo.root_directory:
# When loading a sub-path of a repository from a dump file it has been
# observed it is less error-prone to dump the whole repository as some
# partial dumps fail to be loaded by svnadmin or svnrdump can end up
# with error
try:
self.svnrepo.info(self.svnrepo.repos_root_url)
except SubversionException:
# Repository root URL cannot be accessed by a svn client, try to
# load from a partial dump then
pass
else:
# A dump file for the whole repository can be produced, in that case
# we stop to dump revisions once the last one modifying the repository
# sub-path was dumped (revisions not modifying sub-path are then filtered
# out during the loading process).
max_rev = self.svnrepo.info(self.svn_url).last_changed_revision
self.svn_url = self.svnrepo.repos_root_url
# Then for stale repository, check if the last loaded revision in the archive
# is different from the last revision on the remote subversion server.
# Skip the dump of all revisions and the loading process if they are identical
......@@ -858,7 +917,9 @@ class SvnLoaderFromRemoteDump(SvnLoader):
# Then try to generate a dump file containing relevant svn revisions
# to load, an exception will be thrown if something wrong happened
dump_path, max_rev = self.dump_svn_revisions(self.svn_url, last_loaded_svn_rev)
dump_path, max_rev = self.dump_svn_revisions(
self.svn_url, max_rev, last_loaded_svn_rev
)
# Finally, mount the dump and load the repository
self.log.debug('Mounting dump file with "svnadmin load".')
......
# Copyright (C) 2016-2023 The Software Heritage developers
# Copyright (C) 2016-2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Remote Access client to svn server.
"""
"""Remote Access client to svn server."""
from __future__ import annotations
......@@ -35,7 +33,6 @@ from subvertpy import SubversionException, properties
from subvertpy.ra import RemoteAccess
from swh.model import from_disk
from swh.model.from_disk import DiskBackedContent
from swh.model.model import Content, Directory, SkippedContent
if TYPE_CHECKING:
......@@ -196,6 +193,8 @@ class DirEditor:
"""
if path in self.directory:
if self.editor.debug:
logger.debug("Removing path %s", path)
entry_removed = self.directory[path]
del self.directory[path]
self.dir_states.pop(path, None)
......@@ -389,8 +388,7 @@ class DirEditor:
externals = self.dir_states[self.path].externals
for path in externals.keys():
self.remove_external_path(os.fsencode(path))
self.dir_states[self.path].externals = {}
self.dir_states.pop(self.path)
def delete_entry(self, path: str, revision: int) -> None:
"""Remove a path."""
......@@ -412,18 +410,20 @@ class DirEditor:
force=True,
)
if path_bytes not in self.editor.external_paths:
self.remove_child(path_bytes)
elif os.path.isdir(fullpath):
if os.path.isdir(fullpath):
# versioned and external paths can overlap so we need to iterate on
# all subpaths to check which ones to remove
for root, dirs, files in os.walk(fullpath):
# all subpaths to check which ones to remove, paths are iterated in
# a bottom-up manner to ensure all related dir states are removed
for root, dirs, files in os.walk(fullpath, topdown=False):
for p in chain(dirs, files):
full_repo_path = os.path.join(root, p)
repo_path = full_repo_path.replace(self.rootpath + b"/", b"")
if repo_path not in self.editor.external_paths:
self.remove_child(repo_path)
if path_bytes not in self.editor.external_paths:
self.remove_child(path_bytes)
def close(self):
"""Function called when we finish processing a repository.
......@@ -452,6 +452,11 @@ class DirEditor:
old_externals = prev_externals_set - externals_set
for path, _, _, _ in old_externals:
self.remove_external_path(os.fsencode(path))
if path in externals and externals[path]:
# case where two externals were previously targeting the same path
# and one was removed, export again the remaining one in case its
# content changed
self.process_external(path, externals[path][0], force=True)
else:
# some external paths might have been removed in the current replayed
# revision by a delete operation on an overlapping versioned path so we
......@@ -502,6 +507,7 @@ class DirEditor:
path: str,
external: ExternalDefinition,
remove_target_path: bool = True,
force: bool = False,
) -> None:
dest_path = os.fsencode(path)
dest_fullpath = os.path.join(self.path, dest_path)
......@@ -511,6 +517,7 @@ class DirEditor:
path in prev_externals
and external in prev_externals[path]
and dest_fullpath in self.directory
and not force
):
# external already exported, nothing to do
return
......@@ -525,13 +532,15 @@ class DirEditor:
"Exporting external %s%s%s to path %s",
external.url,
f" at revision {external.revision}" if external.revision else "",
f" and peg revision {external.peg_revision}"
if external.peg_revision
else "",
(
f" and peg revision {external.peg_revision}"
if external.peg_revision
else ""
),
dest_fullpath,
)
if external not in self.editor.externals_cache:
if external not in self.editor.externals_cache or force:
try:
# try to export external in a temporary path, destination path could
# be versioned and must be overridden only if the external URL is
......@@ -654,9 +663,9 @@ class DirEditor:
self.dir_states[self.path].externals_paths.update(external_paths)
for external_path in external_paths:
self.editor.external_paths[
self.editor.external_paths.add(
os.path.join(self.path, external_path)
] += 1
)
# ensure hash update for the directory with externals set
self.directory[self.path].update_hash(force=True)
......@@ -677,27 +686,29 @@ class DirEditor:
if self.editor.debug:
logger.debug("Removing external path %s", fullpath)
# decrement number of references for external path when we really remove it
# (when remove_subpaths is False, we just cleanup the external path before
# copying exported paths in it)
if force or (fullpath in self.editor.external_paths and remove_subpaths):
self.editor.external_paths[fullpath] -= 1
if (
fullpath in self.editor.external_paths
and self.editor.external_paths[fullpath] == 0
):
can_remove_external = True
subpath_split = fullpath.split(b"/")[:-1]
# check there is no overlapping external set in ancestor directories
# and mark current external not to be removed if it is the case
for i in reversed(range(1, len(subpath_split))):
subpath = b"/".join(subpath_split[0:i])
subdir_state = self.editor.dir_states.get(subpath)
if subdir_state and fullpath in {
os.path.join(subpath, ext_path)
for ext_path in subdir_state.externals_paths
}:
can_remove_external = False
break
if force or can_remove_external:
self.remove_child(fullpath)
self.editor.external_paths.pop(fullpath, None)
self.editor.external_paths.discard(fullpath)
self.editor.valid_externals.pop(fullpath, None)
for path in list(self.editor.external_paths):
if path.startswith(fullpath + b"/"):
self.editor.external_paths[path] -= 1
if self.editor.external_paths[path] == 0:
self.editor.external_paths.pop(path)
self.editor.external_paths.remove(path)
if remove_subpaths:
subpath_split = fullpath.split(b"/")[:-1]
for i in reversed(range(1, len(subpath_split) + 1)):
# delete external sub-directory only if it is not versioned
subpath = b"/".join(subpath_split[0:i])
......@@ -752,7 +763,7 @@ class Editor:
self.rootpath = rootpath
self.directory = directory
self.dir_states: Dict[bytes, DirState] = defaultdict(DirState)
self.external_paths: Dict[bytes, int] = defaultdict(int)
self.external_paths: Set[bytes] = set()
self.valid_externals: Dict[bytes, Tuple[str, bool]] = {}
self.dead_externals: Set[Tuple[str, Optional[int], Optional[int], bool]] = set()
self.externals_cache_dir = tempfile.mkdtemp(dir=temp_dir)
......@@ -843,7 +854,7 @@ class Replay:
for obj_node in self.directory.collect():
obj = obj_node.to_model() # type: ignore
obj_type = obj.object_type
if obj_type in (Content.object_type, DiskBackedContent.object_type):
if obj_type == Content.object_type:
contents.append(obj.with_data())
elif obj_type == SkippedContent.object_type:
skipped_contents.append(obj)
......
# Copyright (C) 2015-2024 The Software Heritage developers
# Copyright (C) 2015-2025 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -10,6 +10,7 @@ commit.
"""
import bisect
from contextlib import contextmanager
from datetime import datetime
import logging
import os
......@@ -23,6 +24,9 @@ from subvertpy.ra import (
Auth,
RemoteAccess,
get_simple_prompt_provider,
get_ssl_client_cert_file_provider,
get_ssl_client_cert_pw_file_provider,
get_ssl_server_trust_file_provider,
get_username_provider,
)
......@@ -40,6 +44,21 @@ DEFAULT_AUTHOR_MESSAGE = b""
logger = logging.getLogger(__name__)
@contextmanager
def ssh_askpass_anonymous():
"""Context manager to prevent blocking subversion checkout/export operation
due to password prompt triggered by an external definition whose target URL
starts with 'svn+ssh://<user>@'. The requested password is automatically set
to 'anonymous' in that case."""
with tempfile.NamedTemporaryFile(mode="w") as askpass_script:
askpass_script.write("#!/bin/sh\necho anonymous")
askpass_script.flush()
os.chmod(askpass_script.name, 0o700)
os.environ["SSH_ASKPASS_REQUIRE"] = "force"
os.environ["SSH_ASKPASS"] = askpass_script.name
yield askpass_script
class SvnRepo:
"""Svn repository representation.
......@@ -105,6 +124,12 @@ class SvnRepo:
self.remote_url = remote_url.rstrip("/")
auth_providers += [
get_ssl_client_cert_file_provider(),
get_ssl_client_cert_pw_file_provider(),
get_ssl_server_trust_file_provider(),
]
self.auth = Auth(auth_providers)
# one client for update operation
self.client = client.Client(auth=self.auth)
......@@ -135,6 +160,13 @@ class SvnRepo:
# compute root directory path from the origin URL, required to
# properly load the sub-tree of a repository mounted from a dump file
repos_root_url = self.info(self.origin_url).repos_root_url
origin_url_parsed = urlparse(self.origin_url)
repos_root_url_parsed = urlparse(repos_root_url)
if origin_url_parsed.scheme != repos_root_url_parsed.scheme:
# update repos_root_url scheme in case of redirection
repos_root_url = urlunparse(
repos_root_url_parsed._replace(scheme=origin_url_parsed.scheme)
)
self.root_directory = self.origin_url.rstrip("/").replace(repos_root_url, "", 1)
# get root repository URL from the remote URL
self.repos_root_url = self.info(self.remote_url).repos_root_url
......@@ -305,16 +337,17 @@ class SvnRepo:
f"@{peg_rev}" if peg_rev else "",
to,
)
return self.client.export(
quote_svn_url(url),
to=to,
rev=rev,
peg_rev=peg_rev,
recurse=recurse,
ignore_externals=ignore_externals,
overwrite=overwrite,
ignore_keywords=ignore_keywords,
)
with ssh_askpass_anonymous():
return self.client.export(
quote_svn_url(url),
to=to,
rev=rev,
peg_rev=peg_rev,
recurse=recurse,
ignore_externals=ignore_externals,
overwrite=overwrite,
ignore_keywords=ignore_keywords,
)
@svn_retry()
def checkout(
......@@ -354,15 +387,16 @@ class SvnRepo:
f"@{peg_rev}" if peg_rev else "",
path,
)
return self.client.checkout(
quote_svn_url(url),
path=path,
rev=rev,
peg_rev=peg_rev,
recurse=recurse,
ignore_externals=ignore_externals,
allow_unver_obstructions=allow_unver_obstructions,
)
with ssh_askpass_anonymous():
return self.client.checkout(
quote_svn_url(url),
path=path,
rev=rev,
peg_rev=peg_rev,
recurse=recurse,
ignore_externals=ignore_externals,
allow_unver_obstructions=allow_unver_obstructions,
)
@svn_retry()
def propget(
......@@ -635,7 +669,7 @@ class SvnRepo:
)
def get_svn_repo(*args, **kwargs):
def get_svn_repo(*args, **kwargs) -> Optional[SvnRepo]:
"""Instantiate an SvnRepo class and trap SubversionException if any raises.
In case of connection error to the repository, its read access using anonymous
credentials is also attempted.
......@@ -676,3 +710,4 @@ def get_svn_repo(*args, **kwargs):
continue
else:
raise
return None
# Copyright (C) 2019-2023 The Software Heritage developers
# Copyright (C) 2019-2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from contextlib import closing
import socket
import subprocess
import time
from typing import Any, Dict
import uuid
import pytest
from swh.loader.svn.loader import SvnLoader, SvnLoaderFromRemoteDump
from swh.loader.svn.svn_repo import SvnRepo
from swh.scheduler.model import Lister
from .utils import create_repo
......@@ -61,13 +63,31 @@ def repo_url(tmpdir_factory):
@pytest.fixture(autouse=True)
def svn_retry_sleep_mocker(mocker):
mocker.patch.object(SvnRepo.export.retry, "sleep")
mocker.patch.object(SvnRepo.checkout.retry, "sleep")
mocker.patch.object(SvnRepo.propget.retry, "sleep")
mocker.patch.object(SvnRepo.remote_access.retry, "sleep")
mocker.patch.object(SvnRepo.info.retry, "sleep")
mocker.patch.object(SvnRepo.commit_info.retry, "sleep")
def mock_sleep(mocker):
return mocker.patch("time.sleep")
def find_free_port():
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
s.bind(("", 0))
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
return s.getsockname()[1]
# https://gist.github.com/butla/2d9a4c0f35ea47b7452156c96a4e7b12
def wait_for_port(port: int, host: str = "localhost", timeout: float = 5.0):
start_time = time.perf_counter()
while True:
try:
with socket.create_connection((host, port), timeout=timeout):
break
except OSError as ex:
time.sleep(0.01)
if time.perf_counter() - start_time >= timeout:
raise TimeoutError(
f"Waited too long for the port {port} on host {host} "
"to start accepting connections."
) from ex
@pytest.fixture
......@@ -76,8 +96,9 @@ def svnserve():
after test run"""
svnserve_proc = None
def run_svnserve(repo_root, port):
def run_svnserve(repo_root):
nonlocal svnserve_proc
port = find_free_port()
svnserve_proc = subprocess.Popen(
[
"svnserve",
......@@ -89,6 +110,8 @@ def svnserve():
repo_root,
]
)
wait_for_port(port)
return port
yield run_svnserve
......
# Copyright (C) 2023 The Software Heritage developers
# Copyright (C) 2023-2025 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
from pathlib import Path
import pytest
from swh.loader.core.nar import Nar
from swh.loader.svn.directory import SvnExportLoader
......@@ -19,10 +22,11 @@ from swh.loader.tests import (
def compute_nar_hash_for_rev(repo_url: str, rev: int, hash_name: str = "sha256") -> str:
"""Compute the Nar hashes of the svn tree at the revision 'rev'."""
svn_repo = get_svn_repo(repo_url)
assert svn_repo is not None
_, export_dir = svn_repo.export_temporary(rev)
nar = Nar(hash_names=[hash_name])
nar.serialize(export_dir.decode())
nar.serialize(Path(export_dir.decode()))
return nar.hexdigest()[hash_name]
......@@ -94,6 +98,96 @@ def test_loader_svn_directory(swh_storage, datadir, tmp_path):
assert actual_result2 == {"status": "uneventful"}
@pytest.mark.parametrize(
"use_custom_origin_url",
[False, True],
ids=["origin_url == svn_url", "origin_url != svn_url"],
)
def test_loader_svn_directory_sub_paths(
swh_storage, datadir, tmp_path, use_custom_origin_url
):
"""Loading a subset of a svn tree with proper nar checksums should be eventful"""
archive_name = "pkg-gourmet"
archive_path = os.path.join(datadir, f"{archive_name}.tgz")
repo_url = prepare_repository_from_archive(
archive_path, archive_name, tmp_path=tmp_path
)
svn_paths = ["gourmet/trunk/debian/gourmet.1", "gourmet/trunk/debian/patches"]
svn_revision = 5
checksum_layout = "nar"
checksums = {
"sha256": "21e9553da2f8ae27d6b9ae87f509b0233fc6edbabc1099c31b90e1dec2cbb618"
}
origin_url = (
f"{repo_url}?nar=sha256-{checksums['sha256']}"
if use_custom_origin_url
else repo_url
)
svn_url = repo_url if use_custom_origin_url else None
loader = SvnExportLoader(
swh_storage,
origin_url,
svn_url=svn_url,
ref=svn_revision,
svn_paths=svn_paths,
checksum_layout=checksum_layout,
checksums=checksums,
)
actual_result = loader.load()
assert actual_result == {"status": "eventful"}
actual_visit = assert_last_visit_matches(
swh_storage,
origin_url,
status="full",
type="svn-export",
)
snapshot = swh_storage.snapshot_get(actual_visit.snapshot)
assert snapshot is not None
branches = snapshot["branches"].keys()
expected_branch = f"rev_{svn_revision}".encode()
assert set(branches) == {b"HEAD", expected_branch}
assert get_stats(swh_storage) == {
"content": 3,
"directory": 5,
"origin": 1,
"origin_visit": 1,
"release": 0,
"revision": 0,
"skipped_content": 0,
"snapshot": 1,
}
# Ensure the extids got stored as well
extids = fetch_extids_from_checksums(
loader.storage,
checksum_layout=checksum_layout,
checksums=checksums,
extid_version=loader.extid_version,
)
assert extids[0].extid.hex() == checksums["sha256"]
# Another run should be uneventful
loader2 = SvnExportLoader(
swh_storage,
origin_url,
svn_url=svn_url,
ref=svn_revision,
svn_paths=svn_paths,
checksum_layout=checksum_layout,
checksums=checksums,
)
actual_result2 = loader2.load()
assert actual_result2 == {"status": "uneventful"}
def test_loader_svn_directory_hash_mismatch(swh_storage, datadir, tmp_path):
"""Loading a svn tree with faulty checksums should fail"""
archive_name = "pkg-gourmet"
......@@ -115,7 +209,7 @@ def test_loader_svn_directory_hash_mismatch(swh_storage, datadir, tmp_path):
actual_result = loader.load()
# Ingestion fails because the checks failed
assert actual_result == {"status": "failed"}
assert actual_result["status"] == "failed"
assert get_stats(swh_storage) == {
"content": 0,
"directory": 0,
......
# Copyright (C) 2022-2023 The Software Heritage developers
# Copyright (C) 2022-2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -1163,6 +1163,50 @@ def test_loader_with_recursive_external(
assert not loader.svnrepo.has_recursive_externals
def test_loader_with_not_recursive_external(
svn_loader_cls, swh_storage, repo_url, tmp_path
):
add_commit(
repo_url,
"Add trunk/src dir and a file",
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="trunk/src/bar.sh",
data=b"#!/bin/bash\necho bar",
)
],
)
add_commit(
repo_url,
"Set externals src on trunk/ dir targeting same directory",
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="trunk/",
properties={"svn:externals": (f"{repo_url}/trunk/src src")},
),
],
)
loader = svn_loader_cls(
swh_storage,
repo_url,
temp_directory=tmp_path,
check_revision=1,
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="svn",
)
check_snapshot(loader.snapshot, loader.storage)
assert not loader.svnrepo.has_recursive_externals
def test_loader_externals_with_same_target(
svn_loader_cls, swh_storage, repo_url, external_repo_url, tmp_path
):
......@@ -2424,3 +2468,250 @@ def test_loader_fix_external_removal_edge_case(
type="svn",
)
check_snapshot(loader.snapshot, loader.storage)
def test_loader_externals_same_target_path(
svn_loader_cls, swh_storage, repo_url, external_repo_url, tmp_path
):
add_commit(
repo_url,
"Add common files",
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="common/foo",
data=b"foo",
),
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="common/bar",
data=b"bar",
),
],
)
add_commit(
repo_url,
"Add local externals targeting the same path",
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="trunk/",
properties={"svn:externals": "../common/foo foo\n../common/bar foo\n"},
),
],
)
add_commit(
repo_url,
"Fix path of bar external",
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="trunk/",
properties={"svn:externals": "../common/foo foo\n../common/bar bar\n"},
),
],
)
loader = svn_loader_cls(
swh_storage,
repo_url,
temp_directory=tmp_path,
check_revision=1,
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="svn",
)
check_snapshot(loader.snapshot, loader.storage)
def test_loader_svn_externals_replace(
svn_loader_cls, swh_storage, repo_url, external_repo_url, tmp_path
):
# first commit on external
add_commit(
external_repo_url,
"Create some directories and files in an external repository",
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="code/hello/hello-world",
properties={"svn:executable": "*"},
data=b"#!/bin/bash\necho Hello World !",
),
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="code/foo/foo.sh",
properties={"svn:executable": "*"},
data=b"#!/bin/bash\necho foo",
),
],
)
add_commit(
repo_url,
(
"Set trunk/externals/bin external targeting code/hello directory "
"in external repository"
),
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="trunk/externals/",
properties={
"svn:externals": (
f"{svn_urljoin(external_repo_url, 'code/hello')} bin\n"
)
},
),
],
)
add_commit(
repo_url,
(
"Replace trunk/externals/bin external to target code/foo directory "
"in external repository"
),
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="trunk/externals/",
properties={
"svn:externals": (
f"{svn_urljoin(external_repo_url, 'code/foo')} bin\n"
)
},
),
],
)
add_commit(
repo_url,
(
"Replace trunk/externals/bin external to target code/hello directory again "
"in external repository"
),
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="trunk/externals/",
properties={
"svn:externals": (
f"{svn_urljoin(external_repo_url, 'code/hello')} bin\n"
)
},
),
],
)
loader = svn_loader_cls(
swh_storage,
repo_url,
temp_directory=tmp_path,
check_revision=1,
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="svn",
)
check_snapshot(loader.snapshot, loader.storage)
def test_loader_ensure_dir_state_cleanup_after_external_removal(
svn_loader_cls, swh_storage, repo_url, external_repo_url, tmp_path
):
add_commit(
external_repo_url,
"Create some directories and files in an external repository",
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="code/hello/hello-world",
properties={"svn:executable": "*"},
data=b"#!/bin/bash\necho Hello World !",
),
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="code/foo/foo.sh",
properties={"svn:executable": "*"},
data=b"#!/bin/bash\necho foo",
),
],
)
add_commit(
repo_url,
"Set trunk/code external targeting code directory in external repository",
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="trunk/",
properties={
"svn:externals": (
f"{svn_urljoin(external_repo_url, 'code')} code\n"
)
},
),
],
)
add_commit(
repo_url,
(
"Unset external on trunk and set trunk/code/hello external "
"targeting code/hello directory in external repository"
),
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="trunk/",
properties={"svn:externals": None},
),
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="trunk/code/",
properties={
"svn:externals": (
f"{svn_urljoin(external_repo_url, 'code/hello')} hello\n"
)
},
),
],
)
add_commit(
repo_url,
"Unset external on trunk/code",
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="trunk/code/",
properties={"svn:externals": None},
),
],
)
loader = svn_loader_cls(
swh_storage,
repo_url,
temp_directory=tmp_path,
check_revision=1,
debug=True,
)
assert loader.load() == {"status": "eventful"}
assert_last_visit_matches(
loader.storage,
repo_url,
status="full",
type="svn",
)
check_snapshot(loader.snapshot, loader.storage)
# Copyright (C) 2016-2023 The Software Heritage developers
# Copyright (C) 2016-2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -29,7 +29,7 @@ from swh.loader.tests import (
)
from swh.model.from_disk import DentryPerms, Directory
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Snapshot, SnapshotBranch, TargetType
from swh.model.model import Snapshot, SnapshotBranch, SnapshotTargetType
from .utils import CommitChange, CommitChangeType, add_commit
......@@ -38,7 +38,7 @@ GOURMET_SNAPSHOT = Snapshot(
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes("4876cb10aec6f708f7466dddf547567b65f6c39c"),
target_type=TargetType.REVISION,
target_type=SnapshotTargetType.REVISION,
)
},
)
......@@ -48,7 +48,7 @@ GOURMET_UPDATES_SNAPSHOT = Snapshot(
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes("171dc35522bfd17dda4e90a542a0377fb2fc707a"),
target_type=TargetType.REVISION,
target_type=SnapshotTargetType.REVISION,
)
},
)
......@@ -112,7 +112,7 @@ def test_loader_svn_failures(svn_loader_cls, swh_storage, tmp_path, exception, m
existing_repo_url = "existing-repo-url"
loader = svn_loader_cls(swh_storage, existing_repo_url, temp_directory=tmp_path)
assert loader.load() == {"status": "failed"}
assert loader.load()["status"] == "failed"
assert_last_visit_matches(
swh_storage,
......@@ -525,7 +525,7 @@ def test_loader_svn_visit_with_eol_style(
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes("7da4975c363101b819756d33459f30a866d01b1b"),
target_type=TargetType.REVISION,
target_type=SnapshotTargetType.REVISION,
)
},
)
......@@ -567,7 +567,7 @@ def test_loader_svn_visit_with_mixed_crlf_lf(
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes("9c6962eeb9164a636c374be700672355e34a98a7"),
target_type=TargetType.REVISION,
target_type=SnapshotTargetType.REVISION,
)
},
)
......@@ -610,7 +610,7 @@ def test_loader_svn_with_symlink(svn_loader_cls, swh_storage, datadir, tmp_path)
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes("3f43af2578fccf18b0d4198e48563da7929dc608"),
target_type=TargetType.REVISION,
target_type=SnapshotTargetType.REVISION,
)
},
)
......@@ -651,7 +651,7 @@ def test_loader_svn_with_wrong_symlinks(svn_loader_cls, swh_storage, datadir, tm
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes("cf30d3bb9d5967d0a2bbeacc405f10a5dd9b138a"),
target_type=TargetType.REVISION,
target_type=SnapshotTargetType.REVISION,
)
},
)
......@@ -910,7 +910,7 @@ def test_loader_user_defined_svn_properties(
branches={
b"HEAD": SnapshotBranch(
target=hash_to_bytes("604a17dbb15e8d7ecb3e9f3768d09bf493667a93"),
target_type=TargetType.REVISION,
target_type=SnapshotTargetType.REVISION,
)
},
)
......@@ -1320,7 +1320,7 @@ def test_loader_first_revision_is_not_number_one(
loader = svn_loader_cls(swh_storage, repo_url, temp_directory=tmp_path)
# post loading will detect an issue and make a partial visit with a snapshot
assert loader.load() == {"status": "failed"}
assert loader.load()["status"] == "failed"
assert_last_visit_matches(
loader.storage,
......@@ -1439,7 +1439,7 @@ def test_loader_last_revision_divergence(
loader = SvnLoaderRevisionDivergence(swh_storage, repo_url, temp_directory=tmp_path)
assert loader.load() == {"status": "failed"}
assert loader.load()["status"] == "failed"
assert_last_visit_matches(
loader.storage,
......@@ -1965,7 +1965,7 @@ def test_loader_with_subprojects(
for i in range(1, 4):
# load each project in the repository separately and check behavior
# is the same if origin URL has a trailing slash or not
origin_url = f"{repo_url}/projects/project{i}{'/' if i%2 else ''}"
origin_url = f"{repo_url}/projects/project{i}{'/' if i % 2 else ''}"
loader_params = {
"storage": swh_storage,
......@@ -2000,7 +2000,7 @@ def test_loader_with_subprojects(
assert len(root_dir) == 1 and root_dir[0]["type"] == "file"
if svn_loader_cls == SvnLoaderFromRemoteDump:
dump_revisions.assert_called_once_with(origin_url.rstrip("/"), -1)
dump_revisions.assert_called_once_with(repo_url, i, -1)
if svn_loader_cls == SvnLoaderFromDumpArchive:
loader_params["archive_path"] = _dump_project(tmp_path, origin_url)
......@@ -2163,6 +2163,7 @@ def test_loader_svn_from_remote_dump_url_redirect(swh_storage, tmp_path, mocker)
# check redirection URL has been used to dump repository
assert loader.dump_svn_revisions.call_args_list[0][0][0] == repo_redirect_url
assert loader.svnrepo.root_directory == ""
@pytest.mark.parametrize(
......@@ -2189,9 +2190,6 @@ def test_loader_basic_authentication_required(
repo_root = os.path.dirname(repo_path)
repo_name = os.path.basename(repo_path)
username, password = credentials
port = 12000
repo_url_no_auth = f"svn://localhost:{port}/{repo_name}"
repo_url = f"svn://{username}:{password}@localhost:{port}/{repo_name}"
# disable anonymous access and require authentication on test repo
with open(os.path.join(repo_path, "conf", "svnserve.conf"), "w") as svnserve_conf:
......@@ -2218,7 +2216,9 @@ def test_loader_basic_authentication_required(
passwd.write(f"[users]\n{username} = {password}")
# execute svnserve
svnserve(repo_root, port)
port = svnserve(repo_root)
repo_url_no_auth = f"svn://localhost:{port}/{repo_name}"
repo_url = f"svn://{username}:{password}@localhost:{port}/{repo_name}"
# check loading failed with no authentication in URL apart for anonymous credentials
loader = svn_loader_cls(swh_storage, repo_url_no_auth, temp_directory=tmp_path)
......
# Copyright (C) 2022-2023 The Software Heritage developers
# Copyright (C) 2022-2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -113,14 +113,12 @@ RETRYABLE_EXCEPTIONS = [
@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS)
def test_svn_export_retry_success(
mocker, tmp_path, sample_repo_url, exception_to_retry
mocker, tmp_path, sample_repo_url, exception_to_retry, mock_sleep
):
svnrepo = SvnRepo(
sample_repo_url, sample_repo_url, tmp_path, max_content_length=100000
)
mock_sleep = mocker.patch.object(svnrepo.export.retry, "sleep")
nb_failed_calls = 2
svnrepo.client = SVNClientWrapper(
svnrepo.client, exception_to_retry, nb_failed_calls
......@@ -135,14 +133,12 @@ def test_svn_export_retry_success(
@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS)
def test_svn_export_retry_failure(
mocker, tmp_path, sample_repo_url, exception_to_retry
mocker, tmp_path, sample_repo_url, exception_to_retry, mock_sleep
):
svnrepo = SvnRepo(
sample_repo_url, sample_repo_url, tmp_path, max_content_length=100000
)
mock_sleep = mocker.patch.object(svnrepo.export.retry, "sleep")
nb_failed_calls = SVN_RETRY_MAX_ATTEMPTS
svnrepo.client = SVNClientWrapper(
svnrepo.client, exception_to_retry, nb_failed_calls
......@@ -159,14 +155,12 @@ def test_svn_export_retry_failure(
@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS)
def test_svn_checkout_retry_success(
mocker, tmp_path, sample_repo_url, exception_to_retry
mocker, tmp_path, sample_repo_url, exception_to_retry, mock_sleep
):
svnrepo = SvnRepo(
sample_repo_url, sample_repo_url, tmp_path, max_content_length=100000
)
mock_sleep = mocker.patch.object(svnrepo.checkout.retry, "sleep")
nb_failed_calls = 2
svnrepo.client = SVNClientWrapper(
svnrepo.client, exception_to_retry, nb_failed_calls
......@@ -181,14 +175,12 @@ def test_svn_checkout_retry_success(
@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS)
def test_svn_checkout_retry_failure(
mocker, tmp_path, sample_repo_url, exception_to_retry
mocker, tmp_path, sample_repo_url, exception_to_retry, mock_sleep
):
svnrepo = SvnRepo(
sample_repo_url, sample_repo_url, tmp_path, max_content_length=100000
)
mock_sleep = mocker.patch.object(svnrepo.checkout.retry, "sleep")
nb_failed_calls = SVN_RETRY_MAX_ATTEMPTS
svnrepo.client = SVNClientWrapper(
svnrepo.client, exception_to_retry, nb_failed_calls
......@@ -205,7 +197,7 @@ def test_svn_checkout_retry_failure(
@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS)
def test_svn_propget_retry_success(
mocker, tmp_path, sample_repo_with_externals_url, exception_to_retry
mocker, tmp_path, sample_repo_with_externals_url, exception_to_retry, mock_sleep
):
svnrepo = SvnRepo(
sample_repo_with_externals_url,
......@@ -222,8 +214,6 @@ def test_svn_propget_retry_success(
ignore_externals=True,
)
mock_sleep = mocker.patch.object(svnrepo.propget.retry, "sleep")
nb_failed_calls = 2
svnrepo.client = SVNClientWrapper(
svnrepo.client, exception_to_retry, nb_failed_calls
......@@ -238,7 +228,7 @@ def test_svn_propget_retry_success(
@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS)
def test_svn_propget_retry_failure(
mocker, tmp_path, sample_repo_with_externals_url, exception_to_retry
mocker, tmp_path, sample_repo_with_externals_url, exception_to_retry, mock_sleep
):
svnrepo = SvnRepo(
sample_repo_with_externals_url,
......@@ -255,8 +245,6 @@ def test_svn_propget_retry_failure(
ignore_externals=True,
)
mock_sleep = mocker.patch.object(svnrepo.propget.retry, "sleep")
nb_failed_calls = SVN_RETRY_MAX_ATTEMPTS
svnrepo.client = SVNClientWrapper(
svnrepo.client, exception_to_retry, nb_failed_calls
......@@ -270,7 +258,7 @@ def test_svn_propget_retry_failure(
@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS)
def test_remote_access_retry_success(
mocker, tmp_path, sample_repo_url, exception_to_retry
mocker, tmp_path, sample_repo_url, exception_to_retry, mock_sleep
):
nb_failed_calls = 2
mock_ra = mocker.patch("swh.loader.svn.svn_repo.RemoteAccess")
......@@ -282,8 +270,6 @@ def test_remote_access_retry_success(
+ [remote_access]
)
mock_sleep = mocker.patch.object(SvnRepo.remote_access.retry, "sleep")
SvnRepo(
sample_repo_url,
sample_repo_url,
......@@ -296,7 +282,7 @@ def test_remote_access_retry_success(
@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS)
def test_remote_access_retry_failure(
mocker, tmp_path, sample_repo_url, exception_to_retry
mocker, tmp_path, sample_repo_url, exception_to_retry, mock_sleep
):
nb_failed_calls = SVN_RETRY_MAX_ATTEMPTS
mock_ra = mocker.patch("swh.loader.svn.svn_repo.RemoteAccess")
......@@ -308,8 +294,6 @@ def test_remote_access_retry_failure(
+ [remote_access]
)
mock_sleep = mocker.patch.object(SvnRepo.remote_access.retry, "sleep")
with pytest.raises(type(exception_to_retry)):
SvnRepo(
sample_repo_url,
......@@ -322,13 +306,13 @@ def test_remote_access_retry_failure(
@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS)
def test_svn_info_retry_success(mocker, tmp_path, sample_repo_url, exception_to_retry):
def test_svn_info_retry_success(
mocker, tmp_path, sample_repo_url, exception_to_retry, mock_sleep
):
svnrepo = SvnRepo(
sample_repo_url, sample_repo_url, tmp_path, max_content_length=100000
)
mock_sleep = mocker.patch.object(svnrepo.info.retry, "sleep")
nb_failed_calls = 2
svnrepo.client = SVNClientWrapper(
svnrepo.client, exception_to_retry, nb_failed_calls
......@@ -341,13 +325,13 @@ def test_svn_info_retry_success(mocker, tmp_path, sample_repo_url, exception_to_
@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS)
def test_svn_info_retry_failure(mocker, tmp_path, sample_repo_url, exception_to_retry):
def test_svn_info_retry_failure(
mocker, tmp_path, sample_repo_url, exception_to_retry, mock_sleep
):
svnrepo = SvnRepo(
sample_repo_url, sample_repo_url, tmp_path, max_content_length=100000
)
mock_sleep = mocker.patch.object(svnrepo.info.retry, "sleep")
nb_failed_calls = SVN_RETRY_MAX_ATTEMPTS
svnrepo.client = SVNClientWrapper(
svnrepo.client, exception_to_retry, nb_failed_calls
......@@ -361,14 +345,12 @@ def test_svn_info_retry_failure(mocker, tmp_path, sample_repo_url, exception_to_
@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS)
def test_svn_commit_info_retry_success(
mocker, tmp_path, sample_repo_url, exception_to_retry
mocker, tmp_path, sample_repo_url, exception_to_retry, mock_sleep
):
svnrepo = SvnRepo(
sample_repo_url, sample_repo_url, tmp_path, max_content_length=100000
)
mock_sleep = mocker.patch.object(svnrepo.commit_info.retry, "sleep")
nb_failed_calls = 2
remote_access = SVNRemoteAccessWrapper(
svnrepo.remote_access(), exception_to_retry, nb_failed_calls
......@@ -383,14 +365,12 @@ def test_svn_commit_info_retry_success(
@pytest.mark.parametrize("exception_to_retry", RETRYABLE_EXCEPTIONS)
def test_svn_commit_info_retry_failure(
mocker, tmp_path, sample_repo_url, exception_to_retry
mocker, tmp_path, sample_repo_url, exception_to_retry, mock_sleep
):
svnrepo = SvnRepo(
sample_repo_url, sample_repo_url, tmp_path, max_content_length=100000
)
mock_sleep = mocker.patch.object(svnrepo.commit_info.retry, "sleep")
nb_failed_calls = SVN_RETRY_MAX_ATTEMPTS
remote_access = SVNRemoteAccessWrapper(
svnrepo.remote_access(), exception_to_retry, nb_failed_calls
......