Skip to content
Snippets Groups Projects
Commit d3d60421 authored by Antoine Lambert's avatar Antoine Lambert
Browse files

dumb: Handle missing or corrupted pack file

Some dumb git servers might reference a no longer existing pack file
while it is possible to fully load a repository without it.

So remove bogus pack file from the global packs list when encountering
such edge case and try to continue the loading anyway.

Related to T3618
parent 6b21441d
No related branches found
No related tags found
No related merge requests found
......@@ -8,10 +8,12 @@ from __future__ import annotations
from collections import defaultdict
import logging
import stat
import struct
from tempfile import SpooledTemporaryFile
from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, Set, cast
from dulwich.client import HttpGitClient
from dulwich.errors import NotGitRepository
from dulwich.objects import S_IFGITLINK, Commit, ShaFile, Tree
from dulwich.pack import Pack, PackData, PackIndex, load_pack_index_file
from urllib3.response import HTTPResponse
......@@ -177,10 +179,16 @@ class GitObjectsFetcher:
def _get_git_object(self, sha: bytes) -> ShaFile:
# try to get the object from a pack file first to avoid flooding
# git server with numerous HTTP requests
for pack in self.packs:
if sha in pack:
return pack[sha]
# fetch it from object/ directory otherwise
for pack in list(self.packs):
try:
if sha in pack:
return pack[sha]
except (NotGitRepository, struct.error):
# missing (dulwich http client raises NotGitRepository on 404)
# or invalid pack index/content, remove it from global packs list
logger.debug("A pack file is missing or its content is invalid")
self.packs.remove(pack)
# fetch it from objects/ directory otherwise
sha_hex = sha.decode()
object_path = f"objects/{sha_hex[:2]}/{sha_hex[2:]}"
return ShaFile.from_file(self._http_get(object_path))
......
......@@ -7,6 +7,7 @@ from functools import partial
from http.server import HTTPServer, SimpleHTTPRequestHandler
import os
import subprocess
from tempfile import SpooledTemporaryFile
from threading import Thread
from dulwich.errors import GitProtocolError, NotGitRepository, ObjectFormatException
......@@ -287,6 +288,34 @@ class TestDumbGitLoaderWithPack(DumbGitLoaderTestBase):
def setup_class(cls):
cls.with_pack_files = True
def test_load_with_missing_pack(self, mocker):
"""Some dumb git servers might reference a no longer existing pack file
while it is possible to load a repository without it.
"""
class GitObjectsFetcherMissingPack(dumb.GitObjectsFetcher):
def _http_get(self, path: str) -> SpooledTemporaryFile:
buffer = super()._http_get(path)
if path == "objects/info/packs":
# prepend a non existing pack to the returned packs list
packs = buffer.read().decode("utf-8")
buffer.seek(0)
buffer.write(
(
"P pack-a70762ba1a901af3a0e76de02fc3a99226842745.pack\n"
+ packs
).encode()
)
buffer.flush()
buffer.seek(0)
return buffer
mocker.patch.object(dumb, "GitObjectsFetcher", GitObjectsFetcherMissingPack)
res = self.loader.load()
assert res == {"status": "eventful"}
class TestDumbGitLoaderWithoutPack(DumbGitLoaderTestBase):
@classmethod
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment