Skip to content
Snippets Groups Projects
Verified Commit e9dab3f3 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

swh.loader.svn: Target last known snapshot when error is raised

parent 950b8b25
No related branches found
No related tags found
No related merge requests found
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
DEFAULT_BRANCH = b'master'
......@@ -19,10 +19,12 @@ from swh.model.identifiers import snapshot_identifier
from swh.loader.core.loader import SWHLoader
from . import svn, converters
from .config import DEFAULT_BRANCH
from .utils import init_svn_repo_from_archive_dump
DEFAULT_BRANCH = b'master'
def _revision_id(revision):
return identifier_to_bytes(revision_identifier(revision))
......@@ -57,14 +59,13 @@ class SvnLoaderUneventful(ValueError):
snapshot.
"""
def __init__(self, e, snapshot):
def __init__(self, e):
super().__init__(e)
self.snapshot = snapshot
class SvnLoaderHistoryAltered(ValueError):
def __init__(self, e, *args):
super().__init__(e, *args)
def __init__(self, e):
super().__init__(e)
class SWHSvnLoader(SWHLoader):
......@@ -127,6 +128,49 @@ class SWHSvnLoader(SWHLoader):
svn_url, origin['id'], self.storage,
destination_path=destination_path)
def swh_latest_snapshot_revision(self, origin_id,
previous_swh_revision=None):
"""Look for latest snapshot revision and returns it if any.
Args:
origin_id (int): Origin identifier
previous_swh_revision: (optional) id of a possible
previous swh revision
Returns:
dict: The latest known point in time. Dict with keys:
'revision': latest visited revision
'snapshot': latest snapshot
If None is found, return an empty dict.
"""
storage = self.storage
if not previous_swh_revision: # check latest snapshot's revision
latest_snap = storage.snapshot_get_latest(origin_id)
if latest_snap:
branches = latest_snap.get('branches')
if not branches:
return {}
branch = branches.get(DEFAULT_BRANCH)
if not branch:
return {}
target_type = branch['target_type']
if target_type != 'revision':
return {}
previous_swh_revision = branch['target']
else:
return {}
revs = list(storage.revision_get([previous_swh_revision]))
if revs:
return {
'snapshot': latest_snap,
'revision': revs[0]
}
return {}
def build_swh_revision(self, rev, commit, dir_id, parents):
"""Build the swh revision dictionary.
......@@ -152,12 +196,6 @@ class SWHSvnLoader(SWHLoader):
dir_id,
parents)
def swh_latest_snapshot_revision(self, prev_swh_revision=None):
"""Retrieve swh's previous revision if any.
"""
return self.svnrepo.swh_latest_snapshot_revision(prev_swh_revision)
def check_history_not_altered(self, svnrepo, revision_start, swh_rev):
"""Given a svn repository, check if the history was not tampered with.
......@@ -202,9 +240,8 @@ class SWHSvnLoader(SWHLoader):
if not start_from_scratch:
# Check if we already know a previous revision for that origin
latest_snapshot = self.swh_latest_snapshot_revision()
if latest_snapshot:
swh_rev = latest_snapshot['revision']
if self.latest_snapshot:
swh_rev = self.latest_snapshot['revision']
else:
swh_rev = None
......@@ -227,10 +264,10 @@ class SWHSvnLoader(SWHLoader):
svnrepo,
revision_start,
swh_rev):
msg = 'History of svn %s@%s history modified. ' \
msg = 'History of svn %s@%s altered. ' \
'Skipping...' % (
svnrepo.remote_url, revision_start)
raise SvnLoaderHistoryAltered(msg, *self.args)
raise SvnLoaderHistoryAltered(msg)
# now we know history is ok, we start at next revision
revision_start = revision_start + 1
......@@ -246,7 +283,7 @@ class SWHSvnLoader(SWHLoader):
if revision_start > revision_end and revision_start is not 1:
msg = '%s@%s already injected.' % (svnrepo.remote_url,
revision_end)
raise SvnLoaderUneventful(msg, latest_snapshot['snapshot'])
raise SvnLoaderUneventful(msg)
self.log.info('Processing %s.' % svnrepo)
......@@ -398,6 +435,9 @@ class SWHSvnLoader(SWHLoader):
else:
self.last_known_swh_revision = None
self.latest_snapshot = self.swh_latest_snapshot_revision(
self.origin_id, self.last_known_swh_revision)
self.svnrepo = self.get_svn_repo(svn_url, destination_path, origin)
def get_origin(self):
......@@ -436,15 +476,18 @@ class SWHSvnLoader(SWHLoader):
start_from_scratch=self.start_from_scratch)
except SvnLoaderUneventful as e: # uneventful visit
self.log.info('Uneventful visit. Detail: %s' % e)
# still targets the latest snapshot
self.process_swh_snapshot(revision=None, snapshot=e.snapshot)
if self.latest_snapshot and 'snapshot' in self.latest_snapshot:
snapshot = self.latest_snapshot['snapshot']
self.process_swh_snapshot(snapshot=snapshot)
except SvnLoaderEventful as e:
self.log.error('Eventful partial visit. Detail: %s' % e)
latest_rev = e.swh_revision
self.process_swh_snapshot(revision=latest_rev)
raise
except SvnLoaderHistoryAltered as e:
self.log.error('History altered. Detail: %s' % e)
except Exception as e:
if self.latest_snapshot and 'snapshot' in self.latest_snapshot:
snapshot = self.latest_snapshot['snapshot']
self.process_swh_snapshot(snapshot=snapshot)
raise
else:
self.process_swh_snapshot(revision=latest_rev)
......
......@@ -18,7 +18,6 @@ from subvertpy import client, properties
from swh.model.from_disk import Directory
from .config import DEFAULT_BRANCH
from . import ra, converters
# When log message contains empty data
......@@ -216,47 +215,6 @@ class BaseSvnRepo():
self.remote_url, to=local_url, rev=revision, ignore_keywords=True)
return local_dirname, os.fsencode(local_url)
def swh_latest_snapshot_revision(self, previous_swh_revision=None):
"""Look for latest snapshot revision and returns it if any.
Args:
previous_swh_revision: (optional) id of a possible
previous swh revision
Returns:
dict: The latest known point in time. Dict with keys:
'revision': latest visited revision
'snapshot': latest snapshot
If None is found, return an empty dict.
"""
storage = self.storage
if not previous_swh_revision: # check latest snapshot's revision
latest_snap = storage.snapshot_get_latest(self.origin_id)
if latest_snap:
branches = latest_snap.get('branches')
if not branches:
return {}
branch = branches.get(DEFAULT_BRANCH)
if not branch:
return {}
target_type = branch['target_type']
if target_type != 'revision':
return {}
previous_swh_revision = branch['target']
else:
return {}
revs = list(storage.revision_get([previous_swh_revision]))
if revs:
return {
'snapshot': latest_snap,
'revision': revs[0]
}
return {}
def swh_hash_data_per_revision(self, start_revision, end_revision):
"""Compute swh hash data per each revision between start_revision and
end_revision.
......
......@@ -9,8 +9,7 @@ from unittest import TestCase
from swh.model import hashutil
from swh.loader.svn.config import DEFAULT_BRANCH
from swh.loader.svn.loader import build_swh_snapshot
from swh.loader.svn.loader import build_swh_snapshot, DEFAULT_BRANCH
from swh.loader.svn.loader import SWHSvnLoader, SvnLoaderEventful
from swh.loader.svn.loader import SvnLoaderHistoryAltered, SvnLoaderUneventful
......@@ -93,9 +92,12 @@ class TestSvnLoader:
def close_success(self):
pass
# Override to only prepare the svn repository
def prepare(self, *args, **kwargs):
# Override to only prepare the svn repository
self.svnrepo = self.get_svn_repo(*args)
origin_id = 10
self.latest_snapshot = self.swh_latest_snapshot_revision(
origin_id, None)
class SWHSvnLoaderNoStorage(TestSvnLoader, SWHSvnLoader):
......@@ -105,7 +107,7 @@ class SWHSvnLoaderNoStorage(TestSvnLoader, SWHSvnLoader):
Load a new svn repository using the swh policy (so no update).
"""
def swh_latest_snapshot_revision(self, prev_swh_revision=None):
def swh_latest_snapshot_revision(self, origin_id, prev_swh_revision=None):
"""We do not know this repository so no revision.
"""
......@@ -124,7 +126,7 @@ class SWHSvnLoaderUpdateNoStorage(TestSvnLoader, SWHSvnLoader):
consequence by loading the new revision
"""
def swh_latest_snapshot_revision(self, prev_swh_revision=None):
def swh_latest_snapshot_revision(self, origin_id, prev_swh_revision=None):
"""Avoid the storage persistence call and return the expected previous
revision for that repository.
......@@ -161,7 +163,7 @@ class SWHSvnLoaderUpdateHistoryAlteredNoStorage(TestSvnLoader, SWHSvnLoader):
history altered so we do not update it.
"""
def swh_latest_snapshot_revision(self, prev_swh_revision=None):
def swh_latest_snapshot_revision(self, origin_id, prev_swh_revision=None):
"""Avoid the storage persistence call and return the expected previous
revision for that repository.
......@@ -525,7 +527,7 @@ class SWHSvnLoaderUpdateLessRecentNoStorage(TestSvnLoader, SWHSvnLoader):
visit seen is less recent than a previous unfinished crawl.
"""
def swh_latest_snapshot_revision(self, prev_swh_revision=None):
def swh_latest_snapshot_revision(self, origin_id, prev_swh_revision=None):
"""Avoid the storage persistence call and return the expected previous
revision for that repository.
......@@ -652,6 +654,7 @@ class SWHSvnLoaderUpdateAndTestCornerCasesAboutEolITTest(BaseTestSvnLoader):
]
}
}
# when
self.loader.process_repository(
self.origin_visit,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment