diff --git a/PKG-INFO b/PKG-INFO index 378ac0f494dd962a7ca980c19da26eef0fb410e3..4cf3c64526c4b95bba733cb749fdcd3c76505946 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.loader.cvs -Version: 0.2.2 +Version: 0.3.0 Summary: Software Heritage CVS Loader Home-page: https://forge.softwareheritage.org/diffusion/swh-loader-cvs Author: Software Heritage developers diff --git a/pytest.ini b/pytest.ini index 3c9dea19461053b28da88b45f9472c5742e7b242..378d23aa5b1dd36f18d162e09174710497644115 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,3 +3,5 @@ norecursedirs = build docs .* markers = fs: execute tests that write to the filesystem + +asyncio_mode = strict diff --git a/requirements.txt b/requirements.txt index 1b5e575c97bfeb099014d9d321db162b9837cdbe..481a2139bd1ac86efea513d48b234413080461a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,6 @@ # should match https://pypi.python.org/pypi names. For the full spec or # dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html +sentry-sdk tenacity + diff --git a/swh.loader.cvs.egg-info/PKG-INFO b/swh.loader.cvs.egg-info/PKG-INFO index 378ac0f494dd962a7ca980c19da26eef0fb410e3..4cf3c64526c4b95bba733cb749fdcd3c76505946 100644 --- a/swh.loader.cvs.egg-info/PKG-INFO +++ b/swh.loader.cvs.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.loader.cvs -Version: 0.2.2 +Version: 0.3.0 Summary: Software Heritage CVS Loader Home-page: https://forge.softwareheritage.org/diffusion/swh-loader-cvs Author: Software Heritage developers diff --git a/swh.loader.cvs.egg-info/requires.txt b/swh.loader.cvs.egg-info/requires.txt index cb67ee8332dbd6065446e7dcd9dbb41e569f217b..e131fa96b20f2fe836e31cd1edc63805ceb62658 100644 --- a/swh.loader.cvs.egg-info/requires.txt +++ b/swh.loader.cvs.egg-info/requires.txt @@ -1,3 +1,4 @@ +sentry-sdk tenacity swh.core[http]>=0.3 swh.storage>=0.11.3 diff --git a/swh/loader/cvs/loader.py b/swh/loader/cvs/loader.py index fea1504f944e5306f55a62f68f3504b2d2a8c275..a06ff0abd1bcbf8652ff390c21303937768d479b 100644 --- a/swh/loader/cvs/loader.py +++ b/swh/loader/cvs/loader.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2021 The Software Heritage developers +# Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -15,6 +15,7 @@ import tempfile import time from typing import Any, BinaryIO, Dict, Iterator, List, Optional, Sequence, Tuple, cast +import sentry_sdk from tenacity import retry from tenacity.retry import retry_if_exception_type from tenacity.stop import stop_after_attempt @@ -141,7 +142,7 @@ class CvsLoader(BaseLoader): else: parents = () revision = self.build_swh_revision(k, logmsg, swh_dir.hash, parents) - self.log.info("SWH revision ID: %s", hashutil.hash_to_hex(revision.id)) + self.log.debug("SWH revision ID: %s", hashutil.hash_to_hex(revision.id)) self._last_revision = revision return (revision, swh_dir) @@ -168,7 +169,7 @@ class CvsLoader(BaseLoader): wtpath = os.path.join(self.tempdir_path, path) if not self.file_path_is_safe(wtpath): raise BadPathException(f"unsafe path found in RCS file: {f.path}") - self.log.info("rev %s state %s file %s", f.rev, f.state, f.path) + self.log.debug("rev %s state %s file %s", f.rev, f.state, f.path) if f.state == "dead": # remove this file from work tree try: @@ -217,7 +218,7 @@ class CvsLoader(BaseLoader): wtpath = os.path.join(self.tempdir_path, path) if not self.file_path_is_safe(wtpath): raise BadPathException(f"unsafe path found in cvs rlog output: {f.path}") - self.log.info("rev %s state %s file %s", f.rev, f.state, f.path) + self.log.debug("rev %s state %s file %s", f.rev, f.state, f.path) if f.state == "dead": # remove this file from work tree try: @@ -254,7 +255,7 @@ class CvsLoader(BaseLoader): """ for k in cvs_changesets: tstr = time.strftime("%c", time.gmtime(k.max_time)) - self.log.info( + self.log.debug( "changeset from %s by %s on branch %s", tstr, k.author, k.branch ) logmsg: Optional[bytes] = b"" @@ -295,7 +296,7 @@ class CvsLoader(BaseLoader): ) def cleanup(self) -> None: - self.log.info("cleanup") + self.log.debug("cleanup") def configure_custom_id_keyword(self, cvsconfig): """Parse CVSROOT/config and look for a custom keyword definition. @@ -467,10 +468,10 @@ class CvsLoader(BaseLoader): # will need to be modified such that it spools the list of changesets # to disk instead. cvs = CvsConv(self.cvsroot_path, RcsKeywords(), False, CHANGESET_FUZZ_SEC) - self.log.info("Walking CVS module %s", self.cvs_module_name) + self.log.debug("Walking CVS module %s", self.cvs_module_name) cvs.walk(self.cvs_module_name) cvs_changesets = sorted(cvs.changesets) - self.log.info( + self.log.debug( "CVS changesets found in %s: %d", self.cvs_module_name, len(cvs_changesets), @@ -484,7 +485,7 @@ class CvsLoader(BaseLoader): self.cvsroot_path = os.path.dirname(url.path) self.cvsclient = CVSClient(url) cvsroot_path = os.path.dirname(url.path) - self.log.info( + self.log.debug( "Fetching CVS rlog from %s:%s/%s", url.host, cvsroot_path, @@ -537,7 +538,7 @@ class CvsLoader(BaseLoader): self.rlog.parse_rlog(cast(BinaryIO, fp)) self.rlog_file = cast(BinaryIO, fp) cvs_changesets = sorted(self.rlog.changesets) - self.log.info( + self.log.debug( "CVS changesets found for %s: %d", self.cvs_module_name, len(cvs_changesets), @@ -555,12 +556,15 @@ class CvsLoader(BaseLoader): except StopIteration: assert self._last_revision is not None self.snapshot = self.generate_and_load_snapshot(self._last_revision) - self.log.info("SWH snapshot ID: %s", hashutil.hash_to_hex(self.snapshot.id)) + self.log.debug( + "SWH snapshot ID: %s", hashutil.hash_to_hex(self.snapshot.id) + ) self.flush() self.loaded_snapshot_id = self.snapshot.id return False except Exception: self.log.exception("Exception in fetch_data:") + sentry_sdk.capture_exception() self._visit_status = "failed" return False # Stopping iteration self._contents, self._skipped_contents, self._directories, rev = data