Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • anlambert/swh-loader-svn
  • lunar/swh-loader-svn
  • ardumont/swh-loader-svn
  • swh/devel/swh-loader-svn
  • douardda/swh-loader-svn
  • marmoute/swh-loader-svn
6 results
Show changes
Commits on Source (18)
......@@ -30,7 +30,7 @@ repos:
types: [python]
- repo: https://github.com/PyCQA/isort
rev: 5.10.1
rev: 5.11.5
hooks:
- id: isort
......
......@@ -12,4 +12,11 @@ Reference Documentation
.. toctree::
:maxdepth: 2
/apidoc/swh.loader.svn
.. only:: standalone_package_doc
Indices and tables
------------------
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
# Copyright (C) 2015-2022 The Software Heritage developers
# Copyright (C) 2015-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -22,7 +22,7 @@ from subvertpy import SubversionException
from swh.loader.core.loader import BaseLoader
from swh.loader.core.utils import clean_dangling_folders
from swh.loader.exception import NotFound
from swh.loader.svn.svn import SvnRepo
from swh.loader.svn.svn_repo import SvnRepo
from swh.model import from_disk, hashutil
from swh.model.model import (
Content,
......@@ -223,6 +223,9 @@ Local repository not cleaned up for investigation: %s""",
def check_history_not_altered(self, revision_start: int, swh_rev: Revision) -> bool:
"""Given a svn repository, check if the history was modified in between visits."""
self.log.debug("Checking if history of repository got altered since last visit")
revision_id = swh_rev.id
parents = swh_rev.parents
......@@ -458,12 +461,14 @@ Local repository not cleaned up for investigation: %s""",
raise
def prepare(self):
if self.incremental:
latest_snapshot_revision = self._latest_snapshot_revision(self.origin.url)
if latest_snapshot_revision:
self.latest_snapshot, self.latest_revision = latest_snapshot_revision
self._snapshot = self.latest_snapshot
latest_snapshot_revision = self._latest_snapshot_revision(self.origin.url)
if latest_snapshot_revision:
self.latest_snapshot, self.latest_revision = latest_snapshot_revision
self._snapshot = self.latest_snapshot
if self.incremental:
self._last_revision = self.latest_revision
else:
self.latest_revision = None
local_dirname = self._create_tmp_dir(self.temp_directory)
......@@ -548,6 +553,12 @@ Local repository not cleaned up for investigation: %s""",
)
self.flush()
self.loaded_snapshot_id = self.snapshot.id
if (
self.latest_snapshot
and self.latest_snapshot.id == self.loaded_snapshot_id
):
# no new objects to archive found during the visit
self._load_status = "uneventful"
# reset internal state for next iteration
self._revisions = []
......@@ -845,6 +856,7 @@ class SvnLoaderFromRemoteDump(SvnLoader):
self.temp_dir,
self.max_content_size,
debug=self.debug,
from_dump=True,
)
# Ensure to use remote URL retrieved by SvnRepo as origin URL might redirect
......
# Copyright (C) 2016-2022 The Software Heritage developers
# Copyright (C) 2016-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -42,7 +42,7 @@ from swh.model.from_disk import DiskBackedContent
from swh.model.model import Content, Directory, SkippedContent
if TYPE_CHECKING:
from swh.loader.svn.svn import SvnRepo
from swh.loader.svn.svn_repo import SvnRepo
from swh.loader.svn.utils import (
is_recursive_external,
......@@ -134,7 +134,7 @@ class FileEditor:
self.directory[self.path] = from_disk.Content.from_file(path=self.fullpath)
ExternalDefinition = Tuple[str, Optional[int], bool]
ExternalDefinition = Tuple[str, Optional[int], Optional[int], bool]
@dataclass
......@@ -349,12 +349,13 @@ class DirEditor:
path,
external_url,
revision,
peg_revision,
relative_url,
) = parse_external_definition(
external, os.fsdecode(self.path), self.svnrepo.origin_url
)
self.externals[path].append(
(external_url, revision, relative_url)
(external_url, revision, peg_revision, relative_url)
)
except ValueError:
logger.debug(
......@@ -424,17 +425,17 @@ class DirEditor:
# associated paths
externals = self.externals
prev_externals_set = {
(path, url, rev)
(path, url, rev, peg_rev)
for path in prev_externals.keys()
for (url, rev, _) in prev_externals[path]
for (url, rev, peg_rev, _) in prev_externals[path]
}
externals_set = {
(path, url, rev)
(path, url, rev, peg_rev)
for path in externals.keys()
for (url, rev, _) in externals[path]
for (url, rev, peg_rev, _) in externals[path]
}
old_externals = prev_externals_set - externals_set
for path, _, _ in old_externals:
for path, _, _, _ in old_externals:
self.remove_external_path(os.fsencode(path))
else:
# some external paths might have been removed in the current replayed
......@@ -445,11 +446,12 @@ class DirEditor:
# For each external, try to export it in reconstructed filesystem
for path, externals_def in externals.items():
for i, external in enumerate(externals_def):
external_url, revision, relative_url = external
external_url, revision, peg_revision, relative_url = external
self.process_external(
path,
external_url,
revision,
peg_revision,
relative_url,
remove_target_path=i == 0,
)
......@@ -474,7 +476,7 @@ class DirEditor:
)
for path, dir_state in self.dir_states.items()
for external_path in dir_state.externals.keys()
for (external_url, _, _) in dir_state.externals[external_path]
for (external_url, _, _, _) in dir_state.externals[external_path]
)
if self.svnrepo.has_recursive_externals:
# If the repository has recursive externals, we stop processing
......@@ -489,10 +491,11 @@ class DirEditor:
path: str,
external_url: str,
revision: Optional[int],
peg_revision: Optional[int],
relative_url: bool,
remove_target_path: bool = True,
) -> None:
external = (external_url, revision, relative_url)
external = (external_url, revision, peg_revision, relative_url)
dest_path = os.fsencode(path)
dest_fullpath = os.path.join(self.path, dest_path)
prev_externals = self.dir_states[self.path].externals
......@@ -511,9 +514,10 @@ class DirEditor:
return
logger.debug(
"Exporting external %s%s to path %s",
"Exporting external %s%s%s to path %s",
external_url,
f"@{revision}" if revision else "",
f" at revision {revision}" if revision else "",
f" and peg revision {peg_revision}" if peg_revision else "",
dest_fullpath,
)
......@@ -528,7 +532,11 @@ class DirEditor:
)
temp_path = os.path.join(temp_dir, dest_path)
os.makedirs(b"/".join(temp_path.split(b"/")[:-1]), exist_ok=True)
if external_url not in self.editor.dead_externals:
if (
external_url,
revision,
peg_revision,
) not in self.editor.dead_externals:
url = external_url.rstrip("/")
origin_url = self.svnrepo.origin_url.rstrip("/")
if (
......@@ -539,7 +547,8 @@ class DirEditor:
self.svnrepo.export(
url,
to=temp_path,
peg_rev=revision,
rev=revision,
peg_rev=peg_revision,
ignore_keywords=True,
)
self.editor.externals_cache[external] = temp_path
......@@ -547,7 +556,7 @@ class DirEditor:
except SubversionException as se:
# external no longer available (404)
logger.debug(se)
self.editor.dead_externals.add(external_url)
self.editor.dead_externals.add((external_url, revision, peg_revision))
else:
temp_path = self.editor.externals_cache[external]
......@@ -679,7 +688,7 @@ class DirEditor:
# delete external sub-directory only if it is not versioned
subpath = b"/".join(subpath_split[0:i])
try:
self.svnrepo.client.info(
self.svnrepo.info(
svn_urljoin(self.svnrepo.remote_url, os.fsdecode(subpath)),
peg_revision=self.editor.revnum,
revision=self.editor.revnum,
......@@ -693,11 +702,12 @@ class DirEditor:
# externals can overlap with versioned files so we must restore
# them after removing the path above
dest_path = os.path.join(self.rootpath, fullpath)
self.svnrepo.client.export(
self.svnrepo.export(
svn_urljoin(self.svnrepo.remote_url, os.fsdecode(fullpath)),
to=dest_path,
peg_rev=self.editor.revnum,
ignore_keywords=True,
remove_dest_path=False,
)
if os.path.isfile(dest_path) or os.path.islink(dest_path):
self.directory[fullpath] = from_disk.Content.from_file(path=dest_path)
......@@ -729,7 +739,7 @@ class Editor:
self.dir_states: Dict[bytes, DirState] = defaultdict(DirState)
self.external_paths: Dict[bytes, int] = defaultdict(int)
self.valid_externals: Dict[bytes, Tuple[str, bool]] = {}
self.dead_externals: Set[str] = set()
self.dead_externals: Set[Tuple[str, Optional[int], Optional[int]]] = set()
self.externals_cache_dir = tempfile.mkdtemp(dir=temp_dir)
self.externals_cache: Dict[ExternalDefinition, bytes] = {}
self.svnrepo = svnrepo
......
# Copyright (C) 2015-2022 The Software Heritage developers
# Copyright (C) 2015-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -8,11 +8,14 @@ representations including the hash tree/content computations per svn
commit.
"""
import bisect
from datetime import datetime
import logging
import os
import shutil
import tempfile
from typing import Dict, Iterator, List, Optional, Tuple, Union
from typing import Dict, Iterator, List, Optional, Sequence, Tuple
from urllib.parse import quote, urlparse, urlunparse
from subvertpy import SubversionException, client, properties, wc
......@@ -24,20 +27,14 @@ from subvertpy.ra import (
)
from swh.model.from_disk import Directory as DirectoryFromDisk
from swh.model.model import (
Content,
Directory,
Person,
SkippedContent,
TimestampWithTimezone,
)
from swh.model.model import Content, Directory, SkippedContent
from . import converters, replay
from .svn_retry import svn_retry
from .utils import is_recursive_external, parse_external_definition
# When log message contains empty data
DEFAULT_AUTHOR_MESSAGE = ""
DEFAULT_AUTHOR_MESSAGE = b""
logger = logging.getLogger(__name__)
......@@ -59,12 +56,21 @@ class SvnRepo:
def __init__(
self,
remote_url: str,
origin_url: str,
local_dirname: str,
max_content_length: int,
origin_url: Optional[str] = None,
local_dirname: Optional[str] = None,
max_content_length: int = 100000,
from_dump: bool = False,
debug: bool = False,
):
if origin_url is None:
origin_url = remote_url
self.manage_directory = False
if local_dirname is None:
local_dirname = tempfile.mkdtemp()
self.manage_directory = True
self.local_dirname = local_dirname
self.origin_url = origin_url
self.from_dump = from_dump
......@@ -100,29 +106,26 @@ class SvnRepo:
self.remote_url = remote_url.rstrip("/")
auth = Auth(auth_providers)
self.auth = Auth(auth_providers)
# one client for update operation
self.client = client.Client(auth=auth)
self.client = client.Client(auth=self.auth)
if not self.remote_url.startswith("file://"):
# use redirection URL if any for remote operations
self.remote_url = self.info(self.remote_url).url
# one connection for log iteration
self.conn_log = self.remote_access(auth)
# another for replay
self.conn = self.remote_access(auth)
self.remote_access_url = self.remote_url
if not self.from_dump:
self.remote_url = self.info(self.remote_url).repos_root_url
self.local_dirname = local_dirname
local_name = os.path.basename(self.remote_url)
self.local_url = os.path.join(self.local_dirname, local_name).encode("utf-8")
self.uuid = self.conn.get_uuid().encode("utf-8")
conn = self.remote_access()
self.uuid = conn.get_uuid().encode("utf-8")
self.swhreplay = replay.Replay(
conn=self.conn,
conn=conn,
rootpath=self.local_url,
svnrepo=self,
temp_dir=local_dirname,
......@@ -135,8 +138,15 @@ class SvnRepo:
# compute root directory path from the remote repository URL, required to
# properly load the sub-tree of a repository mounted from a dump file
repos_root_url = self.info(self.origin_url).repos_root_url
self.root_directory = self.origin_url.rstrip("/").replace(repos_root_url, "", 1)
self.repos_root_url = self.info(self.origin_url).repos_root_url
self.root_directory = self.origin_url.rstrip("/").replace(
self.repos_root_url, "", 1
)
def __del__(self):
# ensure temporary directory is removed when created by constructor
if self.manage_directory:
self.clean_fs()
def __str__(self):
return str(
......@@ -150,65 +160,24 @@ class SvnRepo:
def head_revision(self) -> int:
"""Retrieve current head revision."""
return self.conn.get_latest_revnum()
return self.remote_access().get_latest_revnum()
def initial_revision(self) -> int:
"""Retrieve the initial revision from which the remote url appeared."""
return 1
def convert_commit_message(self, msg: Union[str, bytes]) -> bytes:
"""Simply encode the commit message.
def _revision_data(self, log_entry: Tuple) -> Dict:
changed_paths, rev, revprops, _ = log_entry
Args:
msg: the commit message to convert.
Returns:
The transformed message as bytes.
"""
if isinstance(msg, bytes):
return msg
return msg.encode("utf-8")
def convert_commit_date(self, date: bytes) -> TimestampWithTimezone:
"""Convert the message commit date into a timestamp in swh format.
The precision is kept.
Args:
date: the commit date to convert.
Returns:
The transformed date.
"""
return converters.svn_date_to_swh_date(date)
def convert_commit_author(self, author: Optional[bytes]) -> Person:
"""Convert the commit author into an swh person.
Args:
author: the commit author to convert.
Returns:
Person as model object
"""
return converters.svn_author_to_swh_person(author)
def __to_entry(self, log_entry: Tuple) -> Dict:
changed_paths, rev, revprops, has_children = log_entry
author_date = self.convert_commit_date(
author_date = converters.svn_date_to_swh_date(
revprops.get(properties.PROP_REVISION_DATE)
)
author = self.convert_commit_author(
author = converters.svn_author_to_swh_person(
revprops.get(properties.PROP_REVISION_AUTHOR)
)
message = self.convert_commit_message(
revprops.get(properties.PROP_REVISION_LOG, DEFAULT_AUTHOR_MESSAGE)
)
message = revprops.get(properties.PROP_REVISION_LOG, DEFAULT_AUTHOR_MESSAGE)
has_changes = (
not self.from_dump
......@@ -228,36 +197,38 @@ class SvnRepo:
"changed_paths": changed_paths,
}
def logs(self, revision_start: int, revision_end: int) -> Iterator[Dict]:
"""Stream svn logs between revision_start and revision_end by chunks of
block_size logs.
def logs(
self,
revision_start: int,
revision_end: int,
) -> Iterator[Dict]:
"""Stream svn logs between revision_start and revision_end.
Yields revision and associated revision information between the
revision start and revision_end.
Yields revision information between revision_start and revision_end.
Args:
revision_start: the svn revision starting bound
revision_end: the svn revision ending bound
Yields:
tuple: tuple of revisions and logs:
dictionaries of revision data with the following keys:
- revisions: list of revisions in order
- logs: Dictionary with key revision number and value the log
entry. The log entry is a dictionary with the following keys:
- author_date: date of the commit
- author_name: name of the author
- message: commit message
- rev: revision number
- author_date: date of the commit
- author_name: name of the author of the commit
- message: commit message
- has_changes: whether the commit has changes
(can be False when loading subprojects)
- changed_paths: list of paths changed by the commit
"""
for log_entry in self.conn_log.iter_log(
for log_entry in self.remote_access().iter_log(
paths=None,
start=revision_start,
end=revision_end,
discover_changed_paths=True,
):
yield self.__to_entry(log_entry)
yield self._revision_data(log_entry)
@svn_retry()
def commit_info(self, revision: int) -> Optional[Dict]:
......@@ -267,22 +238,36 @@ class SvnRepo:
revision: svn revision to return commit info
Returns:
A dictionary filled with commit info, see :meth:`swh.loader.svn.svn.logs`
A dictionary filled with commit info, see :meth:`swh.loader.svn.svn_repo.logs`
for details about its content.
"""
return next(self.logs(revision, revision), None)
@svn_retry()
def remote_access(self, auth: Auth) -> RemoteAccess:
def remote_access(self) -> RemoteAccess:
"""Simple wrapper around subvertpy.ra.RemoteAccess creation
enabling to retry the operation if a network error occurs."""
return RemoteAccess(self.remote_url, auth=auth)
return RemoteAccess(self.remote_access_url, auth=self.auth)
@svn_retry()
def info(self, origin_url: str):
def info(
self,
origin_url: Optional[str] = None,
peg_revision: Optional[int] = None,
revision: Optional[int] = None,
):
"""Simple wrapper around subvertpy.client.Client.info enabling to retry
the command if a network error occurs."""
info = self.client.info(quote_svn_url(origin_url).rstrip("/"))
the command if a network error occurs.
Args:
origin_url: If provided, query info about a specific repository,
currently set origin URL will be used otherwise
"""
info = self.client.info(
quote_svn_url(origin_url or self.origin_url).rstrip("/"),
peg_revision=peg_revision,
revision=revision,
)
return next(iter(info.values()))
@svn_retry()
......@@ -296,6 +281,7 @@ class SvnRepo:
ignore_externals: bool = False,
overwrite: bool = False,
ignore_keywords: bool = False,
remove_dest_path: bool = True,
) -> int:
"""Simple wrapper around subvertpy.client.Client.export enabling to retry
the command if a network error occurs.
......@@ -303,11 +289,12 @@ class SvnRepo:
See documentation of svn_client_export5 function from subversion C API
to get details about parameters.
"""
# remove export path as command can be retried
if os.path.isfile(to) or os.path.islink(to):
os.remove(to)
elif os.path.isdir(to):
shutil.rmtree(to)
if remove_dest_path:
# remove export path as command can be retried
if os.path.isfile(to) or os.path.islink(to):
os.remove(to)
elif os.path.isdir(to):
shutil.rmtree(to)
options = []
if rev is not None:
options.append(f"-r {rev}")
......@@ -400,6 +387,13 @@ class SvnRepo:
See documentation of svn_client_propget5 function from subversion C API
to get details about parameters.
"""
logger.debug(
"svn propget %s%s %s%s",
"--recursive " if recurse else "",
name,
quote_svn_url(target),
f"@{peg_rev}" if peg_rev else "",
)
target_is_url = urlparse(target).scheme != ""
if target_is_url:
# subvertpy 0.11 has a buggy implementation of propget bindings when
......@@ -450,9 +444,25 @@ class SvnRepo:
# the right export URL,recursive externals are also checked
# get all svn:externals properties recursively
externals = self.propget(
"svn:externals", self.remote_url, revision, revision, True
)
if self.remote_url.startswith("file://"):
externals = self.propget(
"svn:externals", self.remote_url, revision, revision, True
)
else:
# recursive propget operation is terribly slow over the network,
# better doing it from a freshly checked out working copy as it is faster
with tempfile.TemporaryDirectory(
dir=self.local_dirname, prefix=f"checkout-revision-{revision}."
) as co_dirname:
self.checkout(
self.remote_url, co_dirname, revision, ignore_externals=True
)
# get all svn:externals properties recursively
externals = self.propget(
"svn:externals", co_dirname, None, None, True
)
self.has_relative_externals = False
self.has_recursive_externals = False
for path, external_defs in externals.items():
......@@ -467,6 +477,7 @@ class SvnRepo:
external_path,
external_url,
_,
_,
relative_url,
) = parse_external_definition(
external_def.rstrip("\r"), path, self.origin_url
......@@ -621,3 +632,33 @@ class SvnRepo:
if os.path.exists(dirname):
logger.debug("cleanup %s", dirname)
shutil.rmtree(dirname)
def get_head_revision_at_date(self, date: datetime) -> int:
"""Get HEAD revision number for a given date.
Args:
date: the reference date
Returns:
the revision number of the HEAD revision at that date
Raises:
ValueError: first revision date is greater than given date
"""
class RevisionList(Sequence[datetime]):
def __init__(self, svn_repo):
self.svn_repo = svn_repo
self.rev_ids = list(range(1, self.svn_repo.head_revision() + 1))
def __len__(self):
return len(self.rev_ids)
def __getitem__(self, i):
commit_info = self.svn_repo.commit_info(self.rev_ids[i])
return commit_info["author_date"].to_datetime()
if self.commit_info(1)["author_date"].to_datetime() > date:
raise ValueError("First revision date is greater than reference date")
return bisect.bisect_right(RevisionList(self), date)
......@@ -27,6 +27,7 @@ def is_retryable_svn_exception(exception):
"Unable to connect to a repository at URL",
"Error running context: The server unexpectedly closed the connection",
"ra_serf: The server sent a truncated HTTP response body",
"Unexpected HTTP status 504 'Gateway Time-out'",
)
)
return isinstance(exception, (ConnectionResetError, TimeoutError))
......
# Copyright (C) 2019-2022 The Software Heritage developers
# Copyright (C) 2019-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -8,11 +8,16 @@ from typing import Any, Dict
import pytest
from swh.loader.svn.loader import SvnRepo
from swh.loader.svn.loader import SvnLoader, SvnLoaderFromRemoteDump, SvnRepo
from .utils import create_repo
@pytest.fixture(params=[SvnLoader, SvnLoaderFromRemoteDump])
def svn_loader_cls(request):
return request.param
@pytest.fixture
def swh_storage_backend_config(swh_storage_backend_config):
"""Basic pg storage configuration with no journal collaborator
......
This diff is collapsed.
This diff is collapsed.
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime, timedelta, timezone
import gc
import os
import pytest
from swh.loader.svn.svn_repo import SvnRepo
from .utils import CommitChange, CommitChangeType, add_commit
FIRST_COMMIT_DATE = datetime(year=2019, month=1, day=1, tzinfo=timezone.utc)
NB_DAYS_BETWEEN_COMMITS = 2
COMMITS = [
{
"message": f"Create trunk/{file} file",
"date": FIRST_COMMIT_DATE + i * timedelta(days=NB_DAYS_BETWEEN_COMMITS),
"changes": [
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path=f"trunk/{file}",
data=file.encode(),
),
],
}
for i, file in enumerate(("foo", "bar", "baz"))
]
@pytest.fixture
def repo_url(repo_url):
for commit in COMMITS:
add_commit(
repo_url,
commit["message"],
commit["changes"],
commit["date"],
)
return repo_url
def test_svn_repo_temp_dir_cleanup(repo_url):
svn_repo = SvnRepo(repo_url)
tmp_dir = svn_repo.local_dirname
assert os.path.exists(tmp_dir)
del svn_repo
gc.collect()
assert not os.path.exists(tmp_dir)
@pytest.fixture
def svn_repo(repo_url):
return SvnRepo(repo_url)
def test_svn_repo_head_revision(svn_repo):
assert svn_repo.head_revision() == len(COMMITS)
def _assert_commit(i, commit):
assert commit["rev"] == i + 1
assert commit["message"] == COMMITS[i]["message"].encode()
assert commit["has_changes"]
assert commit["changed_paths"]
assert commit["author_date"].to_datetime() == COMMITS[i]["date"]
def test_svn_repo_logs(svn_repo):
for i, commit in enumerate(svn_repo.logs(1, len(COMMITS))):
_assert_commit(i, commit)
def test_svn_repo_commit_info(svn_repo):
for i in range(len(COMMITS)):
commit = svn_repo.commit_info(i + 1)
_assert_commit(i, commit)
def test_svn_repo_info(svn_repo):
info = svn_repo.info()
assert info.url == svn_repo.origin_url
assert info.repos_root_url == svn_repo.origin_url
assert info.revision == len(COMMITS)
def test_svn_repo_get_head_revision_at_date(svn_repo):
for i in range(len(COMMITS)):
assert svn_repo.get_head_revision_at_date(COMMITS[i]["date"]) == i + 1
if i == 0:
with pytest.raises(
ValueError, match="First revision date is greater than reference date"
):
svn_repo.get_head_revision_at_date(
COMMITS[i]["date"] - timedelta(days=NB_DAYS_BETWEEN_COMMITS - 1)
)
else:
assert (
svn_repo.get_head_revision_at_date(
COMMITS[i]["date"] - timedelta(days=NB_DAYS_BETWEEN_COMMITS - 1)
)
== i
)
assert (
svn_repo.get_head_revision_at_date(
COMMITS[i]["date"] + timedelta(days=NB_DAYS_BETWEEN_COMMITS - 1)
)
== i + 1
)
# Copyright (C) 2022 The Software Heritage developers
# Copyright (C) 2022-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -9,7 +9,7 @@ import pytest
from subvertpy import SubversionException
from subvertpy.ra import Auth, RemoteAccess, get_username_provider
from swh.loader.svn.svn import SvnRepo
from swh.loader.svn.svn_repo import SvnRepo
from swh.loader.svn.svn_retry import SVN_RETRY_MAX_ATTEMPTS, SVN_RETRY_WAIT_EXP_BASE
from swh.loader.tests import prepare_repository_from_archive
......@@ -105,6 +105,7 @@ RETRYABLE_EXCEPTIONS = [
SubversionException(
"ra_serf: The server sent a truncated HTTP response body.", 120106
),
SubversionException("Unexpected HTTP status 504 'Gateway Time-out'", 175002),
ConnectionResetError(),
TimeoutError(),
]
......@@ -273,7 +274,7 @@ def test_remote_access_retry_success(
):
nb_failed_calls = 2
mock_ra = mocker.patch("swh.loader.svn.svn.RemoteAccess")
mock_ra = mocker.patch("swh.loader.svn.svn_repo.RemoteAccess")
remote_access = RemoteAccess(sample_repo_url, auth=Auth([get_username_provider()]))
mock_ra.side_effect = (
[exception_to_retry] * nb_failed_calls
......@@ -300,7 +301,7 @@ def test_remote_access_retry_failure(
):
nb_failed_calls = SVN_RETRY_MAX_ATTEMPTS
mock_ra = mocker.patch("swh.loader.svn.svn.RemoteAccess")
mock_ra = mocker.patch("swh.loader.svn.svn_repo.RemoteAccess")
remote_access = RemoteAccess(sample_repo_url, auth=Auth([get_username_provider()]))
mock_ra.side_effect = (
[exception_to_retry] * nb_failed_calls
......@@ -371,9 +372,10 @@ def test_svn_commit_info_retry_success(
mock_sleep = mocker.patch.object(svnrepo.commit_info.retry, "sleep")
nb_failed_calls = 2
svnrepo.conn_log = SVNRemoteAccessWrapper(
svnrepo.conn_log, exception_to_retry, nb_failed_calls
remote_access = SVNRemoteAccessWrapper(
svnrepo.remote_access(), exception_to_retry, nb_failed_calls
)
svnrepo.remote_access = lambda *args: remote_access
commit = svnrepo.commit_info(revision=1)
assert commit
......@@ -392,9 +394,10 @@ def test_svn_commit_info_retry_failure(
mock_sleep = mocker.patch.object(svnrepo.commit_info.retry, "sleep")
nb_failed_calls = SVN_RETRY_MAX_ATTEMPTS
svnrepo.conn_log = SVNRemoteAccessWrapper(
svnrepo.conn_log, exception_to_retry, nb_failed_calls
remote_access = SVNRemoteAccessWrapper(
svnrepo.remote_access(), exception_to_retry, nb_failed_calls
)
svnrepo.remote_access = lambda *args: remote_access
with pytest.raises(type(exception_to_retry)):
svnrepo.commit_info(sample_repo_url)
......
# Copyright (C) 2016-2022 The Software Heritage developers
# Copyright (C) 2016-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime, timedelta, timezone
import logging
import os
from pathlib import Path
......@@ -16,6 +17,8 @@ import pytest
from swh.loader.svn import utils
from swh.loader.tests import prepare_repository_from_archive
from .utils import CommitChange, CommitChangeType, add_commit
def test_outputstream():
stdout_r, stdout_w = pty.openpty()
......@@ -221,13 +224,19 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
"third-party/sounds http://svn.example.com/repos/sounds",
"trunk/externals",
"http://svn.example.org/repos/test",
("third-party/sounds", "http://svn.example.com/repos/sounds", None, False),
(
"third-party/sounds",
"http://svn.example.com/repos/sounds",
None,
None,
False,
),
),
(
"third-party/skins -r148 http://svn.example.com/skinproj",
"trunk/externals",
"http://svn.example.org/repos/test",
("third-party/skins", "http://svn.example.com/skinproj", 148, False),
("third-party/skins", "http://svn.example.com/skinproj", 148, None, False),
),
(
"third-party/skins/toolkit -r21 http://svn.example.com/skin-maker",
......@@ -237,6 +246,7 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
"third-party/skins/toolkit",
"http://svn.example.com/skin-maker",
21,
None,
False,
),
),
......@@ -245,13 +255,19 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
" http://svn.example.com/repos/sounds third-party/sounds",
"trunk/externals",
"http://svn.example.org/repos/test",
("third-party/sounds", "http://svn.example.com/repos/sounds", None, False),
(
"third-party/sounds",
"http://svn.example.com/repos/sounds",
None,
None,
False,
),
),
(
"-r148 http://svn.example.com/skinproj third-party/skins",
"trunk/externals",
"http://svn.example.org/repos/test",
("third-party/skins", "http://svn.example.com/skinproj", 148, False),
("third-party/skins", "http://svn.example.com/skinproj", 148, None, False),
),
(
"-r 21 http://svn.example.com/skin-maker third-party/skins/toolkit",
......@@ -261,6 +277,7 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
"third-party/skins/toolkit",
"http://svn.example.com/skin-maker",
21,
None,
False,
),
),
......@@ -268,13 +285,19 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
"http://svn.example.com/repos/sounds third-party/sounds",
"trunk/externals",
"http://svn.example.org/repos/test",
("third-party/sounds", "http://svn.example.com/repos/sounds", None, False),
(
"third-party/sounds",
"http://svn.example.com/repos/sounds",
None,
None,
False,
),
),
(
"http://svn.example.com/skinproj@148 third-party/skins",
"trunk/externals",
"http://svn.example.org/repos/test",
("third-party/skins", "http://svn.example.com/skinproj", 148, False),
("third-party/skins", "http://svn.example.com/skinproj", None, 148, False),
),
(
"http://anon:anon@svn.example.com/skin-maker@21 third-party/skins/toolkit",
......@@ -283,6 +306,7 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
(
"third-party/skins/toolkit",
"http://anon:anon@svn.example.com/skin-maker",
None,
21,
False,
),
......@@ -295,6 +319,7 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
"third-party/skins/toolkit",
"http://anon:anon@svn.example.com/skin-maker",
21,
None,
False,
),
),
......@@ -306,6 +331,7 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
"third-party/skins/toolkit",
"http://anon:anon@svn.example.com/skin-maker",
21,
21,
False,
),
),
......@@ -318,6 +344,7 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
"third-party/sounds",
"http://svn.example.org/repos/test/sounds",
None,
None,
False,
),
),
......@@ -325,7 +352,7 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
"/skinproj@148 third-party/skins",
"trunk/externals",
"http://svn.example.org/repos/test",
("third-party/skins", "http://svn.example.org/skinproj", 148, True),
("third-party/skins", "http://svn.example.org/skinproj", None, 148, True),
),
(
"//svn.example.com/skin-maker@21 third-party/skins/toolkit",
......@@ -334,6 +361,7 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
(
"third-party/skins/toolkit",
"http://svn.example.com/skin-maker",
None,
21,
True,
),
......@@ -345,6 +373,7 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
(
"third-party/skins/toolkit",
"http://svn.example.org/skin-maker",
None,
21,
True,
),
......@@ -353,20 +382,38 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
"../skins skins",
"trunk/externals",
"http://svn.example.org/repos/test",
("skins", "http://svn.example.org/repos/test/trunk/skins", None, False),
(
"skins",
"http://svn.example.org/repos/test/trunk/skins",
None,
None,
False,
),
),
(
"../skins skins",
"trunk/externals",
"http://svn.example.org/repos/test",
("skins", "http://svn.example.org/repos/test/trunk/skins", None, False),
(
"skins",
"http://svn.example.org/repos/test/trunk/skins",
None,
None,
False,
),
),
# subversion >= 1.6
(
'http://svn.thirdparty.com/repos/My%20Project "My Project"',
"trunk/externals",
"http://svn.example.org/repos/test",
("My Project", "http://svn.thirdparty.com/repos/My%20Project", None, False),
(
"My Project",
"http://svn.thirdparty.com/repos/My%20Project",
None,
None,
False,
),
),
(
'http://svn.thirdparty.com/repos/My%20%20%20Project "My Project"',
......@@ -376,6 +423,7 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
"My Project",
"http://svn.thirdparty.com/repos/My%20%20%20Project",
None,
None,
False,
),
),
......@@ -387,6 +435,7 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
'"Quotes Too"',
"http://svn.thirdparty.com/repos/%22Quotes%20Too%22",
None,
None,
False,
),
),
......@@ -398,6 +447,7 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
'"Quotes Too"',
"http://svn.thirdparty.com/repos/%22Quotes%20%20%20Too%22",
None,
None,
False,
),
),
......@@ -406,49 +456,61 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
'-r1 http://svn.thirdparty.com/repos/test "trunk/PluginFramework"',
"trunk/externals",
"http://svn.example.org/repos/test",
("trunk/PluginFramework", "http://svn.thirdparty.com/repos/test", 1, False),
(
"trunk/PluginFramework",
"http://svn.thirdparty.com/repos/test",
1,
None,
False,
),
),
(
"external -r 9 http://svn.thirdparty.com/repos/test",
"tags",
"http://svn.example.org/repos/test",
("external", "http://svn.thirdparty.com/repos/test", 9, False),
("external", "http://svn.thirdparty.com/repos/test", 9, None, False),
),
(
"./external http://svn.thirdparty.com/repos/test",
"tags",
"http://svn.example.org/repos/test",
("external", "http://svn.thirdparty.com/repos/test", None, False),
("external", "http://svn.thirdparty.com/repos/test", None, None, False),
),
(
".external http://svn.thirdparty.com/repos/test",
"tags",
"http://svn.example.org/repos/test",
(".external", "http://svn.thirdparty.com/repos/test", None, False),
(".external", "http://svn.thirdparty.com/repos/test", None, None, False),
),
(
"external/ http://svn.thirdparty.com/repos/test",
"tags",
"http://svn.example.org/repos/test",
("external", "http://svn.thirdparty.com/repos/test", None, False),
("external", "http://svn.thirdparty.com/repos/test", None, None, False),
),
(
"external ttp://svn.thirdparty.com/repos/test",
"tags",
"http://svn.example.org/repos/test",
("external", "ttp://svn.thirdparty.com/repos/test", None, False),
("external", "ttp://svn.thirdparty.com/repos/test", None, None, False),
),
(
"external http//svn.thirdparty.com/repos/test",
"tags",
"http://svn.example.org/repos/test",
("external", "http//svn.thirdparty.com/repos/test", None, False),
("external", "http//svn.thirdparty.com/repos/test", None, None, False),
),
(
"C:\\code\\repo\\external http://svn.thirdparty.com/repos/test",
"tags",
"http://svn.example.org/repos/test",
("C:coderepoexternal", "http://svn.thirdparty.com/repos/test", None, False),
(
"C:coderepoexternal",
"http://svn.thirdparty.com/repos/test",
None,
None,
False,
),
),
(
"C:\\\\code\\\\repo\\\\external http://svn.thirdparty.com/repos/test",
......@@ -458,6 +520,7 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
"C:\\code\\repo\\external",
"http://svn.thirdparty.com/repos/test",
None,
None,
False,
),
),
......@@ -465,13 +528,25 @@ def test_svn_urljoin(base_url, paths_to_join, expected_result):
"-r 123 http://svn.example.com/repos/sounds@100 third-party/sounds",
"trunk/externals",
"http://svn.example.org/repos/test",
("third-party/sounds", "http://svn.example.com/repos/sounds", 123, False),
(
"third-party/sounds",
"http://svn.example.com/repos/sounds",
123,
100,
False,
),
),
(
"-r 123 http://svn.example.com/repos/sounds@150 third-party/sounds",
"trunk/externals",
"http://svn.example.org/repos/test",
("third-party/sounds", "http://svn.example.com/repos/sounds", 123, False),
(
"third-party/sounds",
"http://svn.example.com/repos/sounds",
123,
150,
False,
),
),
],
)
......@@ -492,3 +567,68 @@ def test_parse_invalid_external_definition(invalid_external):
utils.parse_external_definition(
invalid_external, "/trunk/externals", "http://svn.example.org/repo"
)
FIRST_COMMIT_DATE = datetime(year=2020, month=7, day=14, tzinfo=timezone.utc)
SECOND_COMMIT_DATE = FIRST_COMMIT_DATE + timedelta(minutes=10)
THIRD_COMMIT_DATE = SECOND_COMMIT_DATE + timedelta(hours=1)
@pytest.fixture
def repo_url(repo_url):
add_commit(
repo_url,
"Add trunk/foo/foo path",
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="trunk/foo/foo",
data=b"foo",
)
],
FIRST_COMMIT_DATE,
)
add_commit(
repo_url,
"Add trunk/bar/bar path",
[
CommitChange(
change_type=CommitChangeType.AddOrUpdate,
path="trunk/bar/bar",
data=b"bar",
)
],
SECOND_COMMIT_DATE,
)
add_commit(
repo_url,
"Remove trunk/foo/foo path",
[
CommitChange(
change_type=CommitChangeType.Delete,
path="trunk/foo/",
)
],
THIRD_COMMIT_DATE,
)
return repo_url
def test_get_repo_root_url(repo_url):
utils.get_repo_root_url(repo_url) == repo_url
utils.get_repo_root_url(f"{repo_url}/trunk/foo/foo") == repo_url
utils.get_repo_root_url(f"{repo_url}/trunk/bar/bar") == repo_url
def test_get_head_revision_at_date(repo_url):
utils.get_head_revision_at_date(repo_url, FIRST_COMMIT_DATE) == 1
utils.get_head_revision_at_date(repo_url, SECOND_COMMIT_DATE) == 2
utils.get_head_revision_at_date(repo_url, THIRD_COMMIT_DATE) == 3
utils.get_head_revision_at_date(
repo_url, FIRST_COMMIT_DATE + (SECOND_COMMIT_DATE - FIRST_COMMIT_DATE) / 2
) == 1
utils.get_head_revision_at_date(
repo_url, SECOND_COMMIT_DATE + (THIRD_COMMIT_DATE - SECOND_COMMIT_DATE) / 2
) == 2
# Copyright (C) 2022 The Software Heritage developers
# Copyright (C) 2022-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime
from enum import Enum
from io import BytesIO
import os
from typing import Dict, List
from typing import Dict, List, Optional
from subvertpy import SubversionException, delta, repos
from subvertpy.ra import Auth, RemoteAccess, get_username_provider
......@@ -27,7 +28,12 @@ class CommitChange(TypedDict, total=False):
copyfrom_rev: int
def add_commit(repo_url: str, message: str, changes: List[CommitChange]) -> None:
def add_commit(
repo_url: str,
message: str,
changes: List[CommitChange],
date: Optional[datetime] = None,
) -> None:
conn = RemoteAccess(repo_url, auth=Auth([get_username_provider()]))
editor = conn.get_commit_editor({"svn:log": message})
root = editor.open_root()
......@@ -71,8 +77,24 @@ def add_commit(repo_url: str, message: str, changes: List[CommitChange]) -> None
root.close()
editor.close()
if date is not None:
conn.change_rev_prop(
conn.get_latest_revnum(),
"svn:date",
date.strftime("%Y-%m-%dT%H:%M:%S.%fZ").encode(),
)
def create_repo(tmp_path, repo_name="tmprepo"):
repo_path = os.path.join(tmp_path, repo_name)
repos.create(repo_path)
# add passthrough hooks to allow modifying revision properties like svn:date
hooks_path = f"{repo_path}/hooks"
for hook_file in (
f"{hooks_path}/pre-revprop-change",
f"{hooks_path}/post-revprop-change",
):
with open(hook_file, "wb") as hook:
hook.write(b"#!/bin/sh\n\nexit 0")
os.chmod(hook_file, 0o775)
return f"file://{repo_path}"
# Copyright (C) 2016-2022 The Software Heritage developers
# Copyright (C) 2016-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime
import errno
from functools import lru_cache
import logging
import os
import re
......@@ -13,6 +15,9 @@ import tempfile
from typing import Optional, Tuple
from urllib.parse import quote, urlparse, urlunparse
import iso8601
from subvertpy import SubversionException
logger = logging.getLogger(__name__)
......@@ -111,7 +116,13 @@ def init_svn_repo_from_dump(
# load dump and bypass properties validation as Unicode decoding errors
# are already handled in loader implementation (see _ra_codecs_error_handler
# in ra.py)
cmd = ["svnadmin", "load", "-q", "--bypass-prop-validation"]
cmd = [
"svnadmin",
"load",
"-q",
"--bypass-prop-validation",
"--no-flush-to-disk", # loading is significantly faster with this option
]
if max_rev > 0:
cmd.append(f"-r1:{max_rev}")
cmd.append(repo_path)
......@@ -212,7 +223,7 @@ def svn_urljoin(base_url: str, *args) -> str:
def parse_external_definition(
external: str, dir_path: str, repo_url: str
) -> Tuple[str, str, Optional[int], bool]:
) -> Tuple[str, str, Optional[int], Optional[int], bool]:
"""Parse a subversion external definition.
Args:
......@@ -228,6 +239,7 @@ def parse_external_definition(
- path relative to dir_path where the external should be exported
- URL of the external to export
- optional revision of the external to export
- optional peg revision of the external to export
- boolean indicating if the external URL is relative to the repository
URL and targets a path not in the repository
......@@ -235,6 +247,7 @@ def parse_external_definition(
path = ""
external_url = ""
revision = None
peg_revision = None
relative_url = False
prev_part = None
# turn multiple spaces into a single one and split on space
......@@ -303,18 +316,30 @@ def parse_external_definition(
url, revision_s = external_url.rsplit("@", maxsplit=1)
try:
# ensure revision_s can be parsed to int
rev = int(revision_s)
# -r XXX takes priority over <svn_url>@XXX
revision = revision or rev
peg_revision = int(revision_s)
external_url = url
except ValueError:
# handle URL like http://user@svn.example.org/
pass
if urlparse(external_url).username is None:
# handle URL like http://user@svn.example.org/
external_url = url
if revision_s.startswith("{") and revision_s.endswith("}"):
# revision as a date special case, subvertpy does not support such revision
# format in its API so we need to get the HEAD revision number at that date
try:
date = iso8601.parse_date(revision_s[1:-1])
repo_root_url = get_repo_root_url(external_url)
peg_revision = get_head_revision_at_date(repo_root_url, date)
except Exception as e:
# typically when repository no longer exists or temporary network failures,
# for the latter case if the loader did not export the external at the right
# revision it will detect it at next origin visit and perform a full reload.
logger.debug(e)
pass
if not external_url or not path:
raise ValueError(f"Failed to parse external definition '{external}'")
return (path.rstrip("/"), external_url, revision, relative_url)
return path.rstrip("/"), external_url, revision, peg_revision, relative_url
def is_recursive_external(
......@@ -342,3 +367,65 @@ def is_recursive_external(
return svn_urljoin(origin_url, quote(dir_path), quote(external_path)).startswith(
external_url
)
@lru_cache()
def get_head_revision_at_date(svn_url: str, date: datetime) -> int:
"""Get HEAD revision for repository at given date.
This function wraps calls to
:meth:`swh.loader.svn.svn_repo.SvnRepo.get_head_revision_at_date`
and put result in cache.
Args:
svn_url: URL of subversion repository
date: the reference date
Returns:
the revision number of the HEAD revision at that date
Raises:
SubversionException: repository URL is not valid
ValueError: first revision date is greater than given date
"""
from swh.loader.svn.svn_repo import SvnRepo
return SvnRepo(svn_url).get_head_revision_at_date(date)
@lru_cache()
def _get_repo_root_url(svn_url: str) -> str:
from swh.loader.svn.svn_repo import SvnRepo
return SvnRepo(svn_url).repos_root_url
def get_repo_root_url(svn_url):
"""Get root URL for a repository.
Suversion URL might target a sub-project in a repository.
That function computes the root URL of the repository and
put result in cache.
Args:
svn_url: URL of subversion repository
Returns:
the root URL of the repository
Raises:
SubversionException: subversion URL is not valid
"""
url_splitted = svn_url.split("/")
urls = [svn_url] + [
"/".join(url_splitted[:-i]) for i in range(1, len(url_splitted))
]
for url in urls:
try:
return _get_repo_root_url(url)
except SubversionException:
# URL no longer valid, retry by removing last sub-path in it as targeted
# path might no longer exists in HEAD revision
pass
return svn_url
[tox]
requires =
tox>4
envlist=black,flake8,mypy,py3
[testenv]
......@@ -33,7 +35,7 @@ commands =
extras =
testing
deps =
mypy==0.942
mypy==1.0.1
commands =
mypy swh
......@@ -41,14 +43,13 @@ commands =
# git HEAD of swh-docs, is executed on CI for each diff to prevent
# breaking doc build
[testenv:sphinx]
whitelist_externals = make
allowlist_externals = make
usedevelop = true
extras =
testing
deps =
# fetch and install swh-docs in develop mode
-e git+https://forge.softwareheritage.org/source/swh-docs#egg=swh.docs
-e git+https://gitlab.softwareheritage.org/swh/devel/swh-docs.git\#egg=swh.docs
setenv =
SWH_PACKAGE_DOC_TOX_BUILD = 1
# turn warnings into errors
......@@ -56,18 +57,16 @@ setenv =
commands =
make -I ../.tox/sphinx/src/swh-docs/swh/ -C docs
# build documentation only inside swh-environment using local state
# of swh-docs package
[testenv:sphinx-dev]
whitelist_externals = make
allowlist_externals = make
usedevelop = true
extras =
testing
deps =
# install swh-docs in develop mode
-e ../swh-docs
setenv =
SWH_PACKAGE_DOC_TOX_BUILD = 1
# turn warnings into errors
......