From 968ddef295559d690d5c595538396449c3a7c2bd Mon Sep 17 00:00:00 2001
From: Franck Bret <franck.bret@octobus.net>
Date: Thu, 12 Oct 2023 14:31:48 +0200
Subject: [PATCH] Improve registry repository management

Ensure the registry path does not exists before cloning the repository.
---
 swh/lister/julia/lister.py            |  8 ++++----
 swh/lister/julia/tests/test_lister.py | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/swh/lister/julia/lister.py b/swh/lister/julia/lister.py
index fb9211f0..214cbcef 100644
--- a/swh/lister/julia/lister.py
+++ b/swh/lister/julia/lister.py
@@ -33,7 +33,7 @@ class JuliaLister(StatelessLister[JuliaListerPage]):
     REPO_URL = (
         "https://github.com/JuliaRegistries/General.git"  # Julia General Registry
     )
-    REPO_PATH = Path(tempfile.mkdtemp("General"))
+    REPO_PATH = Path(tempfile.mkdtemp(), "General")
     REGISTRY_PATH = REPO_PATH / "Registry.toml"
 
     def __init__(
@@ -58,10 +58,10 @@ class JuliaLister(StatelessLister[JuliaListerPage]):
 
     def get_registry_repository(self) -> None:
         """Get Julia General Registry Git repository up to date on disk"""
-        if self.REPO_PATH.exists():
-            porcelain.pull(self.REPO_PATH, remote_location=self.url)
-        else:
+        try:
             porcelain.clone(source=self.url, target=self.REPO_PATH)
+        except FileExistsError:
+            porcelain.pull(self.REPO_PATH, remote_location=self.url)
 
     def get_pages(self) -> Iterator[JuliaListerPage]:
         """Yield an iterator which returns 'page'
diff --git a/swh/lister/julia/tests/test_lister.py b/swh/lister/julia/tests/test_lister.py
index f67b0bf8..6e5d2ea9 100644
--- a/swh/lister/julia/tests/test_lister.py
+++ b/swh/lister/julia/tests/test_lister.py
@@ -14,6 +14,24 @@ expected_origins = [
 ]
 
 
+def test_julia_get_registry_repository(datadir, tmp_path, swh_scheduler):
+    archive_path = Path(datadir, "fake-julia-registry-repository.tar.gz")
+    repo_url = prepare_repository_from_archive(archive_path, "General", tmp_path)
+
+    lister = JuliaLister(url=repo_url, scheduler=swh_scheduler)
+    assert not lister.REPO_PATH.exists()
+
+    lister.get_registry_repository()
+    assert lister.REPO_PATH.exists()
+    # ensure get_registry_repository is idempotent
+    lister.get_registry_repository()
+    assert lister.REPO_PATH.exists()
+
+    # ensure the repository is deleted once the lister has run
+    lister.run()
+    assert not lister.REPO_PATH.exists()
+
+
 def test_julia_lister(datadir, tmp_path, swh_scheduler):
     archive_path = Path(datadir, "fake-julia-registry-repository.tar.gz")
     repo_url = prepare_repository_from_archive(archive_path, "General", tmp_path)
-- 
GitLab