From 63a8a61cbc9837db5851c267a02838f886384a24 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <vlorentz@softwareheritage.org> Date: Thu, 12 Aug 2021 11:06:51 +0200 Subject: [PATCH] deposit: Update status_detail on loader failure So far, it was only updated on checker failure. However, many failures may happen while loading, and we had no way to report them to users so far (besides diving ourselves in the logs/Sentry) --- swh/loader/package/deposit/loader.py | 13 ++++++-- .../package/deposit/tests/test_deposit.py | 6 ++++ swh/loader/package/loader.py | 33 ++++++++++++------- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py index 002f78d4..a95a0a39 100644 --- a/swh/loader/package/deposit/loader.py +++ b/swh/loader/package/deposit/loader.py @@ -253,14 +253,18 @@ class DepositLoader(PackageLoader[DepositPackageInfo]): # Then usual loading return super().load() - def finalize_visit(self, status_visit: str, **kwargs) -> Dict[str, Any]: + def finalize_visit( + self, status_visit: str, errors: Optional[List[str]] = None, **kwargs + ) -> Dict[str, Any]: r = super().finalize_visit(status_visit=status_visit, **kwargs) success = status_visit == "full" # Update deposit status try: if not success: - self.client.status_update(self.deposit_id, status="failed") + self.client.status_update( + self.deposit_id, status="failed", errors=errors, + ) return r snapshot_id = hash_to_bytes(r["snapshot_id"]) @@ -358,6 +362,7 @@ class ApiClient: self, deposit_id: Union[int, str], status: str, + errors: Optional[List[str]] = None, revision_id: Optional[str] = None, directory_id: Optional[str] = None, snapshot_id: Optional[str] = None, @@ -368,7 +373,7 @@ class ApiClient: """ url = f"{self.base_url}/{deposit_id}/update/" - payload = {"status": status} + payload: Dict[str, Any] = {"status": status} if revision_id: payload["revision_id"] = revision_id if directory_id: @@ -377,5 +382,7 @@ class ApiClient: payload["snapshot_id"] = snapshot_id if origin_url: payload["origin_url"] = origin_url + if errors: + payload["status_detail"] = {"loading": errors} self.do("put", url, json=payload) diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py index 9a210ec2..dec39471 100644 --- a/swh/loader/package/deposit/tests/test_deposit.py +++ b/swh/loader/package/deposit/tests/test_deposit.py @@ -152,6 +152,12 @@ def test_deposit_loading_failure_to_retrieve_1_artifact( body = update_query.json() expected_body = { "status": "failed", + "status_detail": { + "loading": [ + "Failed to load branch HEAD for some-url-2: Fail to query " + "'https://deposit.softwareheritage.org/1/private/666/raw/'. Reason: 404" + ] + }, } assert body == expected_body diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py index 70761a55..f609a026 100644 --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -406,6 +406,7 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]): status_visit: str, status_load: str, failed_branches: List[str], + errors: Optional[List[str]] = None, ) -> Dict[str, Any]: """Finalize the visit: @@ -529,6 +530,7 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]): failed_branches=failed_branches, status_visit="failed", status_load="failed", + errors=[str(e)], ) load_exceptions: List[Exception] = [] @@ -536,21 +538,23 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]): # Get the list of all version names try: versions = self.get_versions() - except NotFound: + except NotFound as e: return self.finalize_visit( snapshot=snapshot, visit=visit, failed_branches=failed_branches, status_visit="not_found", status_load="failed", + errors=[str(e)], ) - except Exception: + except Exception as e: return self.finalize_visit( snapshot=snapshot, visit=visit, failed_branches=failed_branches, status_visit="failed", status_load="failed", + errors=[str(e)], ) # Get the metadata of each version's package @@ -576,6 +580,7 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]): tmp_revisions: Dict[str, List[Tuple[str, Sha1Git]]] = { version: [] for version in versions } + errors = [] for (version, branch_name, p_info) in packages_info: logger.debug("package_info: %s", p_info) @@ -601,10 +606,10 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]): self.storage.clear_buffers() load_exceptions.append(e) sentry_sdk.capture_exception(e) - logger.exception( - "Failed loading branch %s for %s", branch_name, self.url - ) + error = f"Failed to load branch {branch_name} for {self.url}" + logger.exception(error) failed_branches.append(branch_name) + errors.append(f"{error}: {e}") continue if revision_id is None: @@ -633,6 +638,7 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]): failed_branches=failed_branches, status_visit="failed", status_load="failed", + errors=errors, ) try: @@ -648,7 +654,9 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]): ) self.storage.flush() except Exception as e: - logger.exception("Failed to build snapshot for origin %s", self.url) + error = f"Failed to build snapshot for origin {self.url}" + logger.exception(error) + errors.append(f"{error}: {e}") sentry_sdk.capture_exception(e) status_visit = "failed" status_load = "failed" @@ -658,9 +666,9 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]): metadata_objects = self.build_extrinsic_snapshot_metadata(snapshot.id) self._load_metadata_objects(metadata_objects) except Exception as e: - logger.exception( - "Failed to load extrinsic snapshot metadata for %s", self.url - ) + error = f"Failed to load extrinsic snapshot metadata for {self.url}" + logger.exception(error) + errors.append(f"{error}: {e}") sentry_sdk.capture_exception(e) status_visit = "partial" status_load = "failed" @@ -669,9 +677,9 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]): metadata_objects = self.build_extrinsic_origin_metadata() self._load_metadata_objects(metadata_objects) except Exception as e: - logger.exception( - "Failed to load extrinsic origin metadata for %s", self.url - ) + error = f"Failed to load extrinsic origin metadata for {self.url}" + logger.exception(error) + errors.append(f"{error}: {e}") sentry_sdk.capture_exception(e) status_visit = "partial" status_load = "failed" @@ -684,6 +692,7 @@ class PackageLoader(BaseLoader, Generic[TPackageInfo]): failed_branches=failed_branches, status_visit=status_visit, status_load=status_load, + errors=errors, ) def _load_directory( -- GitLab