diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py
index 671e40d4a180e5bc37c156dea25ebc4b609a255c..bc9e85258ceb960decb0d027cf1139678709cfee 100644
--- a/swh/lister/nixguix/lister.py
+++ b/swh/lister/nixguix/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2023  The Software Heritage developers
+# Copyright (C) 2020-2024  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -20,18 +20,21 @@ import binascii
 from dataclasses import dataclass
 from enum import Enum
 import logging
-from pathlib import Path
 import random
 import re
 from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
-from urllib.parse import parse_qsl, urlparse
+from urllib.parse import urlparse
 
 import requests
-from requests.exceptions import ConnectionError, InvalidSchema, SSLError
 
 from swh.core.tarball import MIMETYPE_TO_ARCHIVE_FORMAT
-from swh.lister import TARBALL_EXTENSIONS
 from swh.lister.pattern import CredentialsType, StatelessLister
+from swh.lister.utils import (
+    ArtifactNatureMistyped,
+    ArtifactNatureUndetected,
+    is_tarball,
+    url_contains_tarball_filename,
+)
 from swh.scheduler.model import ListedOrigin
 
 logger = logging.getLogger(__name__)
@@ -52,29 +55,6 @@ DEFAULT_EXTENSIONS_TO_IGNORE = [
 ]
 
 
-class ArtifactNatureUndetected(ValueError):
-    """Raised when a remote artifact's nature (tarball, file) cannot be detected."""
-
-    pass
-
-
-class ArtifactNatureMistyped(ValueError):
-    """Raised when a remote artifact is neither a tarball nor a file.
-
-    Error of this type are' probably a misconfiguration in the manifest generation that
-    badly typed a vcs repository.
-
-    """
-
-    pass
-
-
-class ArtifactWithoutExtension(ValueError):
-    """Raised when an artifact nature cannot be determined by its name."""
-
-    pass
-
-
 class ChecksumLayout(Enum):
     """The possible artifact types listed out of the manifest."""
 
@@ -147,163 +127,6 @@ POSSIBLE_TARBALL_MIMETYPES = tuple(MIMETYPE_TO_ARCHIVE_FORMAT.keys())
 PATTERN_VERSION = re.compile(r"(v*[0-9]+[.])([0-9]+[.]*)+")
 
 
-def url_contains_tarball_filename(
-    urlparsed, extensions: List[str], raise_when_no_extension: bool = True
-) -> bool:
-    """Determine whether urlparsed contains a tarball filename ending with one of the
-    extensions passed as parameter, path parts and query parameters are checked.
-
-    This also account for the edge case of a filename with only a version as name (so no
-    extension in the end.)
-
-    Raises:
-        ArtifactWithoutExtension in case no extension is available and
-        raise_when_no_extension is True (the default)
-
-    """
-    paths = [Path(p) for (_, p) in [("_", urlparsed.path)] + parse_qsl(urlparsed.query)]
-    match = any(
-        path_part.endswith(tuple(extensions))
-        for path in paths
-        for path_part in path.parts
-    )
-    if match:
-        return match
-    if raise_when_no_extension and not any(path.suffix != "" for path in paths):
-        raise ArtifactWithoutExtension
-    # Some false negative can happen (e.g. https://<netloc>/path/0.1.5)), so make sure
-    # to catch those
-    name = Path(urlparsed.path).name
-    if not PATTERN_VERSION.match(name):
-        return match
-    if raise_when_no_extension:
-        raise ArtifactWithoutExtension
-    return False
-
-
-def is_tarball(
-    urls: List[str],
-    request: Optional[Any] = None,
-) -> Tuple[bool, str]:
-    """Determine whether a list of files actually are tarball or simple files.
-
-    This iterates over the list of urls provided to detect the artifact's nature. When
-    this cannot be answered simply out of the url and ``request`` is provided, this
-    executes a HTTP `HEAD` query on the url to determine the information. If request is
-    not provided, this raises an ArtifactNatureUndetected exception.
-
-    If, at the end of the iteration on the urls, no detection could be deduced, this
-    raises an ArtifactNatureUndetected.
-
-    Args:
-        urls: name of the remote files to check for artifact nature.
-        request: (Optional) Request object allowing http calls. If not provided and
-            naive check cannot detect anything, this raises ArtifactNatureUndetected.
-
-    Raises:
-        ArtifactNatureUndetected when the artifact's nature cannot be detected out
-            of its urls
-        ArtifactNatureMistyped when the artifact is not a tarball nor a file. It's up to
-            the caller to do what's right with it.
-
-    Returns: A tuple (bool, url). The boolean represents whether the url is an archive
-        or not. The second parameter is the actual url once the head request is issued
-        as a fallback of not finding out whether the urls are tarballs or not.
-
-    """
-
-    def _is_tarball(url):
-        """Determine out of an extension whether url is a tarball.
-
-        Raises:
-            ArtifactWithoutExtension in case no extension is available
-
-        """
-        urlparsed = urlparse(url)
-        if urlparsed.scheme not in ("http", "https", "ftp"):
-            raise ArtifactNatureMistyped(f"Mistyped artifact '{url}'")
-        return url_contains_tarball_filename(urlparsed, TARBALL_EXTENSIONS)
-
-    # Check all urls and as soon as an url allows the nature detection, this stops.
-    exceptions_to_raise = []
-    for url in urls:
-        try:
-            return _is_tarball(url), urls[0]
-        except ArtifactWithoutExtension:
-            if request is None:
-                exc = ArtifactNatureUndetected(
-                    f"Cannot determine artifact type from url <{url}>"
-                )
-                exceptions_to_raise.append(exc)
-                continue
-
-            logger.warning(
-                "Cannot detect extension for <%s>. Fallback to http head query",
-                url,
-            )
-
-            try:
-                response = request.head(url)
-            except (InvalidSchema, SSLError, ConnectionError):
-                exc = ArtifactNatureUndetected(
-                    f"Cannot determine artifact type from url <{url}>"
-                )
-                exceptions_to_raise.append(exc)
-                continue
-
-            if not response.ok or response.status_code == 404:
-                exc = ArtifactNatureUndetected(
-                    f"Cannot determine artifact type from url <{url}>"
-                )
-                exceptions_to_raise.append(exc)
-                continue
-
-            location = response.headers.get("Location")
-            if location:  # It's not always present
-                logger.debug("Location: %s", location)
-                try:
-                    return _is_tarball(location), url
-                except ArtifactWithoutExtension:
-                    logger.warning(
-                        "Still cannot detect extension through location <%s>...",
-                        url,
-                    )
-
-            origin = urls[0]
-
-            content_type = response.headers.get("Content-Type")
-            if content_type:
-                logger.debug("Content-Type: %s", content_type)
-                if content_type == "application/json":
-                    return False, origin
-                return content_type.startswith(POSSIBLE_TARBALL_MIMETYPES), origin
-
-            content_disposition = response.headers.get("Content-Disposition")
-            if content_disposition:
-                logger.debug("Content-Disposition: %s", content_disposition)
-                if "filename=" in content_disposition:
-                    fields = content_disposition.split("; ")
-                    for field in fields:
-                        if "filename=" in field:
-                            _, filename = field.split("filename=")
-                            break
-
-                    return (
-                        url_contains_tarball_filename(
-                            urlparse(filename),
-                            TARBALL_EXTENSIONS,
-                            raise_when_no_extension=False,
-                        ),
-                        origin,
-                    )
-
-    if len(exceptions_to_raise) > 0:
-        raise exceptions_to_raise[0]
-    raise ArtifactNatureUndetected(
-        f"Cannot determine artifact type from url <{urls[0]}>"
-    )
-
-
 VCS_KEYS_MAPPING = {
     "git": {
         "ref": "git_ref",
diff --git a/swh/lister/nixguix/tests/test_lister.py b/swh/lister/nixguix/tests/test_lister.py
index ab976f28c0ee028fa3af65cbb87645b9060d9796..54ac3318d3014f5563e1001c400c2e0ae0e7780c 100644
--- a/swh/lister/nixguix/tests/test_lister.py
+++ b/swh/lister/nixguix/tests/test_lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2022 The Software Heritage developers
+# Copyright (C) 2022-2024 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
@@ -19,14 +19,16 @@ from swh.lister.nixguix.lister import (
     DEFAULT_EXTENSIONS_TO_IGNORE,
     POSSIBLE_TARBALL_MIMETYPES,
     VCS_ARTIFACT_TYPE_TO_VISIT_TYPE,
-    ArtifactNatureMistyped,
-    ArtifactNatureUndetected,
-    ArtifactWithoutExtension,
     NixGuixLister,
     is_tarball,
     url_contains_tarball_filename,
 )
 from swh.lister.pattern import ListerStats
+from swh.lister.utils import (
+    ArtifactNatureMistyped,
+    ArtifactNatureUndetected,
+    ArtifactWithoutExtension,
+)
 
 logger = logging.getLogger(__name__)
 
diff --git a/swh/lister/utils.py b/swh/lister/utils.py
index 60cfc933831579ba38c83cf86894599b159d961c..73262f7228080ef2c07ee1b74e3ec674c57a740f 100644
--- a/swh/lister/utils.py
+++ b/swh/lister/utils.py
@@ -1,9 +1,20 @@
-# Copyright (C) 2018-2023 the Software Heritage developers
+# Copyright (C) 2018-2024 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-from typing import Iterator, Optional, Tuple
-import urllib.parse
+
+import logging
+from pathlib import Path
+import re
+from typing import Any, Iterator, List, Optional, Tuple
+from urllib.parse import parse_qsl, urlparse
+
+from requests.exceptions import ConnectionError, InvalidSchema, SSLError
+
+from swh.core.tarball import MIMETYPE_TO_ARCHIVE_FORMAT
+from swh.lister import TARBALL_EXTENSIONS
+
+logger = logging.getLogger(__name__)
 
 
 def split_range(total_pages: int, nb_pages: int) -> Iterator[Tuple[int, int]]:
@@ -65,7 +76,7 @@ def is_valid_origin_url(url: Optional[str]) -> bool:
         # Empty or None
         return False
 
-    parsed = urllib.parse.urlparse(url)
+    parsed = urlparse(url)
     if not parsed.netloc:
         # Is parsed as a relative URL
         return False
@@ -75,3 +86,190 @@ def is_valid_origin_url(url: Optional[str]) -> bool:
         return False
 
     return True
+
+
+class ArtifactNatureUndetected(ValueError):
+    """Raised when a remote artifact's nature (tarball, file) cannot be detected."""
+
+    pass
+
+
+class ArtifactNatureMistyped(ValueError):
+    """Raised when a remote artifact is neither a tarball nor a file.
+
+    Error of this type are' probably a misconfiguration in the manifest generation that
+    badly typed a vcs repository.
+
+    """
+
+    pass
+
+
+class ArtifactWithoutExtension(ValueError):
+    """Raised when an artifact nature cannot be determined by its name."""
+
+    pass
+
+
+# Rough approximation of what we can find of mimetypes for tarballs "out there"
+POSSIBLE_TARBALL_MIMETYPES = tuple(MIMETYPE_TO_ARCHIVE_FORMAT.keys())
+
+
+PATTERN_VERSION = re.compile(r"(v*[0-9]+[.])([0-9]+[.]*)+")
+
+
+def url_contains_tarball_filename(
+    urlparsed, extensions: List[str], raise_when_no_extension: bool = True
+) -> bool:
+    """Determine whether urlparsed contains a tarball filename ending with one of the
+    extensions passed as parameter, path parts and query parameters are checked.
+
+    This also account for the edge case of a filename with only a version as name (so no
+    extension in the end.)
+
+    Raises:
+        ArtifactWithoutExtension in case no extension is available and
+        raise_when_no_extension is True (the default)
+
+    """
+    paths = [Path(p) for (_, p) in [("_", urlparsed.path)] + parse_qsl(urlparsed.query)]
+    match = any(
+        path_part.endswith(tuple(extensions))
+        for path in paths
+        for path_part in path.parts
+    )
+    if match:
+        return match
+    if raise_when_no_extension and not any(path.suffix != "" for path in paths):
+        raise ArtifactWithoutExtension
+    # Some false negative can happen (e.g. https://<netloc>/path/0.1.5)), so make sure
+    # to catch those
+    name = Path(urlparsed.path).name
+    if not PATTERN_VERSION.match(name):
+        return match
+    if raise_when_no_extension:
+        raise ArtifactWithoutExtension
+    return False
+
+
+def is_tarball(
+    urls: List[str],
+    request: Optional[Any] = None,
+) -> Tuple[bool, str]:
+    """Determine whether a list of files actually are tarball or simple files.
+
+    This iterates over the list of urls provided to detect the artifact's nature. When
+    this cannot be answered simply out of the url and ``request`` is provided, this
+    executes a HTTP `HEAD` query on the url to determine the information. If request is
+    not provided, this raises an ArtifactNatureUndetected exception.
+
+    If, at the end of the iteration on the urls, no detection could be deduced, this
+    raises an ArtifactNatureUndetected.
+
+    Args:
+        urls: name of the remote files to check for artifact nature.
+        request: (Optional) Request object allowing http calls. If not provided and
+            naive check cannot detect anything, this raises ArtifactNatureUndetected.
+
+    Raises:
+        ArtifactNatureUndetected when the artifact's nature cannot be detected out
+            of its urls
+        ArtifactNatureMistyped when the artifact is not a tarball nor a file. It's up to
+            the caller to do what's right with it.
+
+    Returns: A tuple (bool, url). The boolean represents whether the url is an archive
+        or not. The second parameter is the actual url once the head request is issued
+        as a fallback of not finding out whether the urls are tarballs or not.
+
+    """
+
+    def _is_tarball(url):
+        """Determine out of an extension whether url is a tarball.
+
+        Raises:
+            ArtifactWithoutExtension in case no extension is available
+
+        """
+        urlparsed = urlparse(url)
+        if urlparsed.scheme not in ("http", "https", "ftp"):
+            raise ArtifactNatureMistyped(f"Mistyped artifact '{url}'")
+        return url_contains_tarball_filename(urlparsed, TARBALL_EXTENSIONS)
+
+    # Check all urls and as soon as an url allows the nature detection, this stops.
+    exceptions_to_raise = []
+    for url in urls:
+        try:
+            return _is_tarball(url), urls[0]
+        except ArtifactWithoutExtension:
+            if request is None:
+                exc = ArtifactNatureUndetected(
+                    f"Cannot determine artifact type from url <{url}>"
+                )
+                exceptions_to_raise.append(exc)
+                continue
+
+            logger.warning(
+                "Cannot detect extension for <%s>. Fallback to http head query",
+                url,
+            )
+
+            try:
+                response = request.head(url)
+            except (InvalidSchema, SSLError, ConnectionError):
+                exc = ArtifactNatureUndetected(
+                    f"Cannot determine artifact type from url <{url}>"
+                )
+                exceptions_to_raise.append(exc)
+                continue
+
+            if not response.ok or response.status_code == 404:
+                exc = ArtifactNatureUndetected(
+                    f"Cannot determine artifact type from url <{url}>"
+                )
+                exceptions_to_raise.append(exc)
+                continue
+
+            location = response.headers.get("Location")
+            if location:  # It's not always present
+                logger.debug("Location: %s", location)
+                try:
+                    return _is_tarball(location), url
+                except ArtifactWithoutExtension:
+                    logger.warning(
+                        "Still cannot detect extension through location <%s>...",
+                        url,
+                    )
+
+            origin = urls[0]
+
+            content_type = response.headers.get("Content-Type")
+            if content_type:
+                logger.debug("Content-Type: %s", content_type)
+                if content_type == "application/json":
+                    return False, origin
+                return content_type.startswith(POSSIBLE_TARBALL_MIMETYPES), origin
+
+            content_disposition = response.headers.get("Content-Disposition")
+            if content_disposition:
+                logger.debug("Content-Disposition: %s", content_disposition)
+                if "filename=" in content_disposition:
+                    fields = content_disposition.split("; ")
+                    for field in fields:
+                        if "filename=" in field:
+                            _, filename = field.split("filename=")
+                            break
+
+                    return (
+                        url_contains_tarball_filename(
+                            urlparse(filename),
+                            TARBALL_EXTENSIONS,
+                            raise_when_no_extension=False,
+                        ),
+                        origin,
+                    )
+
+    if len(exceptions_to_raise) > 0:
+        raise exceptions_to_raise[0]
+    raise ArtifactNatureUndetected(
+        f"Cannot determine artifact type from url <{urls[0]}>"
+    )