From 734207ba5847a114d2ffe7d879be8ac54dbed4f6 Mon Sep 17 00:00:00 2001
From: Antoine Lambert <anlambert@softwareheritage.org>
Date: Fri, 14 Oct 2022 13:39:38 +0200
Subject: [PATCH] rlog: Skip rlog entry with missing header in
 RlogConv.parse_rlog

CVS rlog for a given module sent by server is a concatenation of
rlog entries. Each entry has a header containing the path to a
RCS file plus other info.

It exist cases where a rlog entry header is empty which makes the
rlog parsing fail.

So instead of stopping rlog parsing by raising an exception, prefer
to skip that entry and process the next one.

Closes T4629
---
 swh/loader/cvs/rlog.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/swh/loader/cvs/rlog.py b/swh/loader/cvs/rlog.py
index c3d39c3..80e9db8 100644
--- a/swh/loader/cvs/rlog.py
+++ b/swh/loader/cvs/rlog.py
@@ -46,6 +46,7 @@
 
 import calendar
 from collections import defaultdict
+import logging
 import re
 import string
 import time
@@ -53,6 +54,8 @@ from typing import BinaryIO, Dict, List, NamedTuple, Optional, Tuple
 
 from swh.loader.cvs.cvs2gitdump.cvs2gitdump import ChangeSetKey
 
+logger = logging.getLogger(__name__)
+
 
 class revtuple(NamedTuple):
     number: str
@@ -217,7 +220,9 @@ class RlogConv:
             if filename:
                 path = filename
             elif not eof:
-                raise ValueError("No filename found in rlog header")
+                logger.warning(
+                    "No filename found in rlog header, skipping associated entry"
+                )
             while not eof:
                 off = fp.tell()
                 rev, logmsg, eof = _parse_log_entry(fp)
@@ -230,7 +235,8 @@ class RlogConv:
                     if rev:
                         self.offsets[path][rev[0]] = off
 
-            self._process_rlog_revisions(path, taginfo, revisions, logmsgs)
+            if path:
+                self._process_rlog_revisions(path, taginfo, revisions, logmsgs)
 
     def getlog(self, fp: BinaryIO, path: bytes, rev: str) -> Optional[bytes]:
         off = self.offsets[path][rev]
-- 
GitLab