From 734207ba5847a114d2ffe7d879be8ac54dbed4f6 Mon Sep 17 00:00:00 2001 From: Antoine Lambert <anlambert@softwareheritage.org> Date: Fri, 14 Oct 2022 13:39:38 +0200 Subject: [PATCH] rlog: Skip rlog entry with missing header in RlogConv.parse_rlog CVS rlog for a given module sent by server is a concatenation of rlog entries. Each entry has a header containing the path to a RCS file plus other info. It exist cases where a rlog entry header is empty which makes the rlog parsing fail. So instead of stopping rlog parsing by raising an exception, prefer to skip that entry and process the next one. Closes T4629 --- swh/loader/cvs/rlog.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/swh/loader/cvs/rlog.py b/swh/loader/cvs/rlog.py index c3d39c3..80e9db8 100644 --- a/swh/loader/cvs/rlog.py +++ b/swh/loader/cvs/rlog.py @@ -46,6 +46,7 @@ import calendar from collections import defaultdict +import logging import re import string import time @@ -53,6 +54,8 @@ from typing import BinaryIO, Dict, List, NamedTuple, Optional, Tuple from swh.loader.cvs.cvs2gitdump.cvs2gitdump import ChangeSetKey +logger = logging.getLogger(__name__) + class revtuple(NamedTuple): number: str @@ -217,7 +220,9 @@ class RlogConv: if filename: path = filename elif not eof: - raise ValueError("No filename found in rlog header") + logger.warning( + "No filename found in rlog header, skipping associated entry" + ) while not eof: off = fp.tell() rev, logmsg, eof = _parse_log_entry(fp) @@ -230,7 +235,8 @@ class RlogConv: if rev: self.offsets[path][rev[0]] = off - self._process_rlog_revisions(path, taginfo, revisions, logmsgs) + if path: + self._process_rlog_revisions(path, taginfo, revisions, logmsgs) def getlog(self, fp: BinaryIO, path: bytes, rev: str) -> Optional[bytes]: off = self.offsets[path][rev] -- GitLab