From 98fe7fe800c21c841b6e8d2f0717c097990160f0 Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Thu, 13 Oct 2022 14:57:07 +0200
Subject: [PATCH] Fix _sanitize_github_url removing suffixes too greedily

.rstrip() is not .removesuffix()
---
 swh/core/github/tests/test_github_utils.py | 6 ++++++
 swh/core/github/utils.py                   | 7 ++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/swh/core/github/tests/test_github_utils.py b/swh/core/github/tests/test_github_utils.py
index c7b70876..d3978f7c 100644
--- a/swh/core/github/tests/test_github_utils.py
+++ b/swh/core/github/tests/test_github_utils.py
@@ -16,6 +16,7 @@ from swh.core.github.utils import (
 )
 
 KNOWN_GH_REPO = "https://github.com/user/repo"
+KNOWN_GH_REPO2 = "https://github.com/user/reposit"
 
 
 @pytest.mark.parametrize(
@@ -26,6 +27,11 @@ KNOWN_GH_REPO = "https://github.com/user/repo"
         ("user/repo/", KNOWN_GH_REPO),
         ("user/repo", KNOWN_GH_REPO),
         ("user/repo/.git", KNOWN_GH_REPO),
+        ("user/reposit.git", KNOWN_GH_REPO2),
+        ("user/reposit.git/", KNOWN_GH_REPO2),
+        ("user/reposit/", KNOWN_GH_REPO2),
+        ("user/reposit", KNOWN_GH_REPO2),
+        ("user/reposit/.git", KNOWN_GH_REPO2),
         ("unknown/page", None),  # unknown gh origin returns None
         ("user/with/deps", None),  # url kind is not dealt with
     ],
diff --git a/swh/core/github/utils.py b/swh/core/github/utils.py
index 80ffa2bd..f10ed345 100644
--- a/swh/core/github/utils.py
+++ b/swh/core/github/utils.py
@@ -32,9 +32,14 @@ def _url_github_api(user_repo: str) -> str:
     return f"https://api.github.com/repos/{user_repo}"
 
 
+_SANITIZATION_RE = re.compile(r"^(.*?)/?(\.git)?/?$")
+
+
 def _sanitize_github_url(url: str) -> str:
     """Sanitize github url."""
-    return url.lower().rstrip("/").rstrip(".git").rstrip("/")
+    m = _SANITIZATION_RE.match(url.lower())
+    assert m is not None, url  # impossible, but mypy doesn't know it
+    return m.group(1)
 
 
 def get_canonical_github_origin_url(
-- 
GitLab