From 98fe7fe800c21c841b6e8d2f0717c097990160f0 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <vlorentz@softwareheritage.org> Date: Thu, 13 Oct 2022 14:57:07 +0200 Subject: [PATCH] Fix _sanitize_github_url removing suffixes too greedily .rstrip() is not .removesuffix() --- swh/core/github/tests/test_github_utils.py | 6 ++++++ swh/core/github/utils.py | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/swh/core/github/tests/test_github_utils.py b/swh/core/github/tests/test_github_utils.py index c7b70876..d3978f7c 100644 --- a/swh/core/github/tests/test_github_utils.py +++ b/swh/core/github/tests/test_github_utils.py @@ -16,6 +16,7 @@ from swh.core.github.utils import ( ) KNOWN_GH_REPO = "https://github.com/user/repo" +KNOWN_GH_REPO2 = "https://github.com/user/reposit" @pytest.mark.parametrize( @@ -26,6 +27,11 @@ KNOWN_GH_REPO = "https://github.com/user/repo" ("user/repo/", KNOWN_GH_REPO), ("user/repo", KNOWN_GH_REPO), ("user/repo/.git", KNOWN_GH_REPO), + ("user/reposit.git", KNOWN_GH_REPO2), + ("user/reposit.git/", KNOWN_GH_REPO2), + ("user/reposit/", KNOWN_GH_REPO2), + ("user/reposit", KNOWN_GH_REPO2), + ("user/reposit/.git", KNOWN_GH_REPO2), ("unknown/page", None), # unknown gh origin returns None ("user/with/deps", None), # url kind is not dealt with ], diff --git a/swh/core/github/utils.py b/swh/core/github/utils.py index 80ffa2bd..f10ed345 100644 --- a/swh/core/github/utils.py +++ b/swh/core/github/utils.py @@ -32,9 +32,14 @@ def _url_github_api(user_repo: str) -> str: return f"https://api.github.com/repos/{user_repo}" +_SANITIZATION_RE = re.compile(r"^(.*?)/?(\.git)?/?$") + + def _sanitize_github_url(url: str) -> str: """Sanitize github url.""" - return url.lower().rstrip("/").rstrip(".git").rstrip("/") + m = _SANITIZATION_RE.match(url.lower()) + assert m is not None, url # impossible, but mypy doesn't know it + return m.group(1) def get_canonical_github_origin_url( -- GitLab