diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 59cb0de58a27d254e4998563682dec289aa72a7a..7fc590b5c95596df1a82e98df1b62e5cc7621f3c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -17,7 +17,7 @@ repos:
     hooks:
       - id: codespell
         name: Check source code spelling
-        args: [-L aks]
+        args: [-L aks, -L crate]
         stages: [commit]
 
   - repo: local
diff --git a/docs/software-origins-support.yml b/docs/software-origins-support.yml
index f2888719c1108b8f6c5a9605562c137c42e22bbc..c3f8a7664fad8a7efa4f36ee771de94e78929325 100644
--- a/docs/software-origins-support.yml
+++ b/docs/software-origins-support.yml
@@ -4,9 +4,13 @@ forges:
     lister:
       status: dev
       issue: https://gitlab.softwareheritage.org/swh/meta/-/issues/4233
+      supports_last_update: true
     loader:
       status: dev
       issue: https://gitlab.softwareheritage.org/swh/meta/-/issues/4233
+    metadata:
+      intrinsic: collected
+      extrinsic: none
     grant: sloan-hashbang-2022
     developer: hashbang
 
@@ -17,15 +21,22 @@ forges:
     loader:
       status: prod
       id_in_swh_web: tar
+    metadata:
+      intrinsic: none
+      extrinsic: none
 
   aur:
     name: AUR
     lister:
       status: dev
       issue: https://gitlab.softwareheritage.org/swh/meta/-/issues/4466
+      supports_last_update: true
     loader:
       status: dev
       issue: https://gitlab.softwareheritage.org/swh/meta/-/issues/4466
+    metadata:
+      intrinsic: collected
+      extrinsic: none
     grant: sloan-hashbang-2022
     developer: hashbang
 
@@ -37,6 +48,9 @@ forges:
       status: prod
       source: https://gitlab.softwareheritage.org/swh/devel/swh-loader-bzr/
       package_name: swh.loader.bzr
+    metadata:
+      intrinsic: none
+      extrinsic: none
     grant: sloan-octobus-2021
     developer: octobus
 
@@ -44,15 +58,23 @@ forges:
     name: Bitbucket
     lister:
       status: prod
+      supports_last_update: true
     loader:
       status: N/A
+    metadata:
+      intrinsic: none
+      extrinsic: not collected
 
   bower:
     name: Bower
     lister:
       status: staging
+      supports_last_update: false
     loader:
       status: N/A
+    metadata:
+      intrinsic: none
+      extrinsic: not collected
     grant: nlnet-octobus-2022
     developer: octobus
 
@@ -60,17 +82,25 @@ forges:
     name: CGit
     lister:
       status: prod
+      supports_last_update: true
     loader:
       status: N/A
+    metadata:
+      intrinsic: none
+      extrinsic: not collected
 
   conda:
     name: Conda
     lister:
       status: dev
       issue: https://gitlab.softwareheritage.org/swh/devel/swh-lister/-/issues/4547
+      supports_last_update: true
     loader:
       status: dev
       issue: https://gitlab.softwareheritage.org/swh/devel/swh-loader-core/-/issues/4579
+    metadata:
+      intrinsic: collected
+      extrinsic: not collected
     grant: nlnet-octobus-2022
     developer: octobus
 
@@ -78,9 +108,14 @@ forges:
     name: CPAN
     lister:
       status: dev
-      issue: https://gitlab.softwareheritage.org/swh/devel/swh-lister/-/issues/2833
+      issue: https://gitlab.softwareheritage.org/swh/devel/swh-lister/-/issues/4520
+      supports_last_update: true
     loader:
-      status: N/A
+      status: dev
+      issue: https://gitlab.softwareheritage.org/swh/devel/swh-lister/-/issues/2833
+    metadata:
+      intrinsic: collected
+      extrinsic: not collected
     grant: nlnet-octobus-2022
     developer: octobus
 
@@ -89,17 +124,25 @@ forges:
     lister:
       status: prod
       id_in_swh_web: CRAN
+      supports_last_update: true
     loader:
       status: prod
+    metadata:
+      intrinsic: collected
+      extrinsic: none
 
   crates:
     name: Crates
     lister:
       status: dev
       issue: https://gitlab.softwareheritage.org/swh/devel/swh-lister/-/issues/1424
+      supports_last_update: true
     loader:
       status: dev
       issue: https://gitlab.softwareheritage.org/swh/meta/-/issues/4104
+    metadata:
+      intrinsic: collected
+      extrinsic: not collected
     grant: sloan-hashbang-2022
     developer: hashbang
 
@@ -111,6 +154,9 @@ forges:
       status: prod
       source: https://gitlab.softwareheritage.org/swh/devel/swh-loader-cvs/
       package_name: swh.loader.cvs
+    metadata:
+      intrinsic: none
+      extrinsic: none
     grant: sloan-stsp-cvs
     developer: stsp
 
@@ -118,9 +164,13 @@ forges:
     name: Debian
     lister:
       status: prod
+      supports_last_update: true
     loader:
       status: prod
       id_in_swh_web: deb
+    metadata:
+      intrinsic: collected
+      extrinsic: not collected
 
   deposit:
     name: Deposit
@@ -128,14 +178,21 @@ forges:
       status: N/A
     loader:
       status: prod
+    metadata:
+      intrinsic: indexed
+      extrinsic: indexed
 
   gitea:
     name: Gitea
     notes: "Reuses the Gogs lister"
     lister:
       status: prod
+      supports_last_update: true
     loader:
       status: N/A
+    metadata:
+      intrinsic: none
+      extrinsic: indexed
 
   git:
     name: Git
@@ -145,44 +202,67 @@ forges:
       status: prod
       source: https://gitlab.softwareheritage.org/swh/devel/swh-loader-git/
       package_name: swh.loader.git
+    metadata:
+      intrinsic: none
+      extrinsic: none
 
   github:
     name: GitHub
     lister:
       status: prod
+      supports_last_update: true
     loader:
       status: N/A
+    metadata:
+      intrinsic: none
+      extrinsic: indexed
 
   gitlab:
     name: GitLab
     notes: "Also supports `Heptapod <https://heptapod.net/>`_"
     lister:
       status: prod
+      supports_last_update: true
     loader:
       status: N/A
+    metadata:
+      intrinsic: none
+      extrinsic: not collected
 
   gnu:
     name: GNU
     lister:
       status: prod
       id_in_swh_web: GNU
+      supports_last_update: true
     loader:
       status: N/A
+    metadata:
+      intrinsic: collected
+      extrinsic: none
 
   gogs:
     name: Gogs
     lister:
-      status: dev
+      status: prod
       issue: https://gitlab.softwareheritage.org/swh/devel/swh-lister/-/issues/1721
+      supports_last_update: true
     loader:
       status: N/A
+    metadata:
+      intrinsic: none
+      extrinsic: not collected
 
   golang:
     name: Golang
     lister:
-      status: staging
+      status: prod
+      supports_last_update: true
     loader:
-      status: staging
+      status: prod
+    metadata:
+      intrinsic: none
+      extrinsic: none
     grant: nlnet-octobus-2022
     developer: octobus
 
@@ -191,9 +271,13 @@ forges:
     lister:
       status: dev
       issue: https://gitlab.softwareheritage.org/swh/meta/-/issues/4494
+      supports_last_update: true
     loader:
       status: dev
       issue: https://gitlab.softwareheritage.org/swh/meta/-/issues/4494
+    metadata:
+      intrinsic: collected
+      extrinsic: none
     grant: nlnet-octobus-2022
     developer: octobus
 
@@ -201,8 +285,12 @@ forges:
     name: Launchpad
     lister:
       status: prod
+      supports_last_update: true
     loader:
       status: N/A
+    metadata:
+      intrinsic: none
+      extrinsic: not collected
 
   maven:
     name: Maven
@@ -210,6 +298,9 @@ forges:
       status: prod
     loader:
       status: prod
+    metadata:
+      intrinsic: collected
+      extrinsic: collected
     grant: sloan-castalia-maven
     developer: castalia
 
@@ -222,6 +313,9 @@ forges:
       id_in_swh_web: hg
       source: https://gitlab.softwareheritage.org/swh/devel/swh-loader-mercurial/
       package_name: swh.loader.mercurial
+    metadata:
+      intrinsic: none
+      extrinsic: none
     grant: sloan-2020
     developer: octobus
 
diff --git a/docs/user/software-origins/arch.rst b/docs/user/software-origins/arch.rst
index 07d9a223555874ff1a2459a00017d80e887e2bf1..e158419360fde5dabde52afa4879f0a7ee733464 100644
--- a/docs/user/software-origins/arch.rst
+++ b/docs/user/software-origins/arch.rst
@@ -3,7 +3,26 @@
 Archlinux
 =========
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/arch_status.inc
+
+This page documents how |swh| archives source packages from the
+`Archlinux <https://archlinux.org/>`_ and `Archlinux ARM <https://archlinuxarm.org>`_
+distribution.
+The `AUR (Archlinux User Repository) <https://aur.archlinux.org/>`_ is
+:ref:`described in its own dedicated documentation <user-software-origins-aur>`,
+as it uses a very different packaging architecture.
+
+|swh| currently has a lister and a loader for Archlinux packages, but they list and load
+binary packages; and need to be modified to list and load source packages instead.
+
+Origin URLs match the one of the canonical web page displaying information about each
+package. For example: https://archlinux.org/packages/core/x86_64/coreutils/
+and https://aur.archlinux.org/packages/hg-evolve.
+
+As all metadata about Archlinux packages is stored within the package (in
+:file:`PKGBUILD` in the source, or :file:`.PKGINFO` in the binary package), |swh| does
+not need to store them as :term:`extrinsic metadata`.
+
+Resources:
+
+* `HTTP API documentation <https://wiki.archlinux.org/title/Official_repositories_web_interface>`_
diff --git a/docs/user/software-origins/archive.rst b/docs/user/software-origins/archive.rst
index 1b75670c79283276f4d25f8eb34badbdb5295670..b0640ef0c77d4a56b954092f48f83e44d6a3a08c 100644
--- a/docs/user/software-origins/archive.rst
+++ b/docs/user/software-origins/archive.rst
@@ -7,3 +7,13 @@ Archive loader
    This page is a work in progress.
 
 .. include:: dynamic/archive_status.inc
+
+TODO:
+
+* description of the software origin
+* summary of the lister's algorithm
+* summary of the loader's algorithm
+* URL pattern
+* collect extrinsic metadata?
+* index extrinsic metadata?
+* index intrinsic metadata?
diff --git a/docs/user/software-origins/aur.rst b/docs/user/software-origins/aur.rst
index ecf226c6874065d080b9239f22d90598138e545b..b15ede16ad22376ff78368755b9c3a8a8fcf489e 100644
--- a/docs/user/software-origins/aur.rst
+++ b/docs/user/software-origins/aur.rst
@@ -3,7 +3,29 @@
 AUR
 ===
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/aur_status.inc
+
+This page documents how |swh| archives source packages from the
+`AUR (Archlinux User Repository) <https://aur.archlinux.org/>`.
+The `Archlinux <https://archlinux.org/>`_ and `Archlinux ARM <https://archlinuxarm.org>`_
+distributions are
+:ref:`described in their own dedicated documentation <user-software-origins-arch>`,
+as they uses a very different packaging architecture.
+
+The AUR lister will send requests to https://aur.archlinux.org/packages-meta-v1.json.gz
+to get a list of packages; then tells the AUR loader to creates origins like
+https://aur.archlinux.org/hg-evolve.git using tarballs from URLs like
+https://aur.archlinux.org/cgit/aur.git/snapshot/hg-evolve.tar.gz
+
+.. note::
+
+   We should probably use https://aur.archlinux.org/packages/hg-evolve as origin URL
+   instead of https://aur.archlinux.org/hg-evolve.git
+
+As all metadata about AUR packages is stored within the :file:`PKGBUILD` file that
+serves as source, |swh| does
+not need to store them as :term:`extrinsic metadata`.
+
+Resources:
+
+* `HTTP API documentation <https://wiki.archlinux.org/title/Aurweb_RPC_interface>`_
diff --git a/docs/user/software-origins/bitbucket.rst b/docs/user/software-origins/bitbucket.rst
index fc6dbef3bffe78fb0e4dd5a7e51a611a1f07c9a0..e43d57a09c6042f84cb5e8d11a7e96705ed5b3e7 100644
--- a/docs/user/software-origins/bitbucket.rst
+++ b/docs/user/software-origins/bitbucket.rst
@@ -3,7 +3,32 @@
 Bitbucket
 =========
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/bitbucket_status.inc
+
+Bitbucket is a Git hosting platform, which used to support Mercurial.
+
+|swh|'s Bitbucket lister queries the https://api.bitbucket.org/2.0/repositories API
+endpoint anonymously.
+
+It provides a ``updated_on`` field for each repository, matching the last time
+the repository (TODO: or project? does it cover stuff like PRs and issues?) was updated;
+which is passed as ``last_update`` to the scheduler.
+
+|swh| does not have a specific loader for Bitbucket; the :ref:`Git
+<user-software-origins-git>` loader is used instead.
+Therefore, origin URLs are Bitbucket's canonical URL for the corresponding Git
+repository: :file:`https://bitbucket.org/{owner}/{name}.git`.
+
+Bitbucket does not support :ref:`Mercurial <user-software-origins-mercurial>` anymore;
+but Mercurial repositories used to be loaded with the Mercurial loader and are
+`available in the archive <https://archive.softwareheritage.org/browse/search/?q=bitbucket.org&with_visit=true&with_content=true&visit_type=hg>`__.
+Additionally, |swh| provides a `dump of raw Mercurial repositories <https://bitbucket-archive.softwareheritage.org/>`_.
+
+Bitbucket provides extrinsic metadata on repositories (owner, description,
+``created_on``, size, language, fork policy, parent repository, ...) which are currently
+not archived. Consequently, fork detection isn't used to speedup archival of git
+repositories yet.
+
+Resources:
+
+* `HTTP API documentation <https://developer.atlassian.com/cloud/bitbucket/rest/api-group-repositories/>`__
diff --git a/docs/user/software-origins/bower.rst b/docs/user/software-origins/bower.rst
index 19bcd8ac8974d3ef9f10f7d6ad1248556946aa7e..71c1e755d825837a7ced1437861bcb05d64c4ebf 100644
--- a/docs/user/software-origins/bower.rst
+++ b/docs/user/software-origins/bower.rst
@@ -3,7 +3,19 @@
 Bower
 =====
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/bower_status.inc
+
+`Bower <https://bower.io/>`_ is a package manager for the Javascript ecosystem,
+which doesn't host its own packages.
+Instead, it points to Git repositories hosted externally (eg. on GitHub).
+
+|swh| archives Bower by querying ``https://registry.bower.io/packages``, which returns
+the complete database of the registry: name and repository URL of every package
+registered on it.
+It then dispatches loading tasks to the :ref:`Git loader <user-software-origins-git>`.
+
+|swh| currently does not archive the mapping from package names to repository URLs.
+
+Resources:
+
+* `Source code of the Bower registry <https://github.com/bower/registry>`_
diff --git a/docs/user/software-origins/bzr.rst b/docs/user/software-origins/bzr.rst
index 5a66361f65f8338f5f9c283314b748e08d08b2bc..a01f3db66c654466e30db4c4b0fd6f1c274b26bf 100644
--- a/docs/user/software-origins/bzr.rst
+++ b/docs/user/software-origins/bzr.rst
@@ -3,7 +3,10 @@
 Bazaar
 ======
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/bzr_status.inc
+
+Bazaar/Breezy repositories are often discovered through listing
+:ref:`user-software-origins-launchpad` or package managers.
+
+Bazaar and Breezy repositories can also be loaded individually if they are not on any recognized
+forge, through the `Save Code Now <https://archive.softwareheritage.org/save/>`__ interface.
diff --git a/docs/user/software-origins/cgit.rst b/docs/user/software-origins/cgit.rst
index b243d8f9ec80c70e59cada8456097cd970830b39..0024ecf872f6fdc740ccf1525391967eae4e4ab3 100644
--- a/docs/user/software-origins/cgit.rst
+++ b/docs/user/software-origins/cgit.rst
@@ -3,7 +3,26 @@
 Cgit
 ====
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/cgit_status.inc
+
+`CGit <https://git.zx2c4.com/cgit/about/>`_ is a lightweight front-end for Git.
+
+|swh|'s archives CGit instances by scrapping their HTML, starting from the index page,
+then looking for Git URLs in each project's page, embedded as ``<link rel='vcs-git'``
+HTML tags.
+Only the first HTTP(S) URL is kept; or the first URL at all, if there is no HTTP(S) URL.
+
+The CGit lister then dispatches these URLs to the :ref:`Git loader
+<user-software-origins-git>`.
+CGit project may have their repository hosted on arbitrary other domains (even GitHub);
+which is supported by |swh|.
+
+The "summary" page of CGit projects display the last update of each of their branch;
+the lister uses this information to pass a ``last_update`` date to the scheduler.
+
+New CGit instances can be submitted to |swh| through the
+`Add Forge Now <https://archive.softwareheritage.org/add-forge/request/create/>`_
+interface.
+
+Project description, owner information, and mapping between CGit projects and
+repositories on third-party domains are currently not archived.
diff --git a/docs/user/software-origins/conda.rst b/docs/user/software-origins/conda.rst
index 5ac13ad6a54de75a9b7bf230842ce233a9412713..7e93bca44b86c9823a0c56be703d2e3bc6004a8b 100644
--- a/docs/user/software-origins/conda.rst
+++ b/docs/user/software-origins/conda.rst
@@ -3,7 +3,27 @@
 Conda
 =====
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/conda_status.inc
+
+`Conda <https://conda.io/>`_ is an alternative package manager for Python, used
+in particular by the `Anaconda <https://www.anaconda.com/>`_ and
+`conda-forge <https://anaconda.org/conda-forge/>`_ distributions,
+with support for other language ecosystems.
+
+|swh| currently has a lister and a loader for Archlinux packages, but they load
+binary packages (``.tar.gz``); and need to be modified to load source packages instead
+(``.conda``).
+
+For every configured channel (``main``, ``conda-forge``, ...) and every architecture
+(``linux-64``, ``win-64``, ...), the Conda lister downloads
+:file:`https://repo.anaconda.com/pkgs/{channel}/{arch}/repodata.json.bz2`,
+from which it extracts a list of package names. Then, from each of these package names,
+it triggers a load for the origin :file:`https://anaconda.org/{channel}/{package_name}`
+with the list of tarballs of that package.
+
+.. note::
+
+    There is a ``_anaconda_depends`` package; what do we and should we do with it?
+
+Source code from Conda is currently only archived on |swh|'s staging infrastructure.
+Metadata from Conda is currently not collected or indexed at all.
diff --git a/docs/user/software-origins/cpan.rst b/docs/user/software-origins/cpan.rst
index 6b3a172c9df16122dcbb9e31a7ce52d7ba28d301..b9a9ab092c5e615fb286133bff5a7a3c84f1798c 100644
--- a/docs/user/software-origins/cpan.rst
+++ b/docs/user/software-origins/cpan.rst
@@ -3,7 +3,20 @@
 CPAN
 ====
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/cpan_status.inc
+
+The `Comprehensive Perl Archive Network <https://www.cpan.org/>` is Perl's main package
+manager.
+
+CPAN packages archived by |swh| will be associated to the metacpan.org domain rather than
+cpan.org in order to point to an original web page with information about the package.
+This pattern of origin URLs is: :file:`https://metacpan.org/dist/{package_name}`,
+which references all versions of the same package.
+
+metacpan.org is also used by |swh| to list packages, thanks to its ElasticSearch API.
+
+CPAN does not seem to store any extrinsic metadata, beyond mapping between author
+username and package. Author name and email is present in intrinsic metadata and in
+release fields, anyway.
+
+Source code from CPAN is currently only archived on |swh|'s staging infrastructure.
diff --git a/docs/user/software-origins/cran.rst b/docs/user/software-origins/cran.rst
index 6bb728a739d88465aafe24635e191b8f3563cc8e..ceffb798f9bf8f511ba655628fa68a2fd8355d13 100644
--- a/docs/user/software-origins/cran.rst
+++ b/docs/user/software-origins/cran.rst
@@ -3,7 +3,19 @@
 CRAN
 ====
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/cran_status.inc
+
+The `Comprehensive R Archive Network <https://cran.r-project.org/>`_ is the package
+management system of the R language.
+
+CRAN does not expose a language-agnostic API with the information we need, so for
+simplicity/efficiency, |swh|'s CRAN lister loads the weekly dump of the CRAN database
+(in RDS format) and parses it with ``rpy2``
+Then for each package, it creates an origin with
+:file:`https://cran.r-project.org/package={package_name}` as URL.
+
+R packages have intrinsic metadata, mostly the :file:`DESCRIPTION` file in their root
+directory, in the `deb822 <https://manpages.debian.org/bookworm/dpkg-dev/deb822.5.en.html>`_
+format.
+|swh|'s R loader parses it to extract authorship information, but this file is otherwise
+not parsed yet.
diff --git a/docs/user/software-origins/crates.rst b/docs/user/software-origins/crates.rst
index e8388e1f69425789a3199a1903a3309e8a200784..2a1111c37abf387d439e3cf4a32926067f83f750 100644
--- a/docs/user/software-origins/crates.rst
+++ b/docs/user/software-origins/crates.rst
@@ -3,7 +3,12 @@
 Crates
 ======
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/crates_status.inc
+
+`crates.io <https://crates.io/>`_ is the package manager of the `Rust programming language
+<https://www.rust-lang.org/>`_.
+
+It relies on `an index hosted on GitHub <https://github.com/rust-lang/crates.io-index>`_,
+and provides `database dumps <https://crates.io/data-access>`_, which |swh| uses to
+list packages, and create origins using this pattern:
+:file:`https://crates.io/crates/{crate}`.
diff --git a/docs/user/software-origins/cvs.rst b/docs/user/software-origins/cvs.rst
index 16f72c99c2ca3336ce5e5997141af9b784b5b79f..346ea446da9b75b143235f6a630368796691f826 100644
--- a/docs/user/software-origins/cvs.rst
+++ b/docs/user/software-origins/cvs.rst
@@ -3,7 +3,10 @@
 CVS
 ===
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/cvs_status.inc
+
+See :ref:`swh-loader-cvs` for a description of how |swh| handles loading CVS
+(aka. Concurrent Versions System) repositories.
+
+CVS repositories can be loaded individually if they are not on any recognized
+forge, through the `Save Code Now <https://archive.softwareheritage.org/save/>`__ interface.
diff --git a/docs/user/software-origins/debian.rst b/docs/user/software-origins/debian.rst
index 014876d953a48204c3ace7cac7c1238c0341ca42..e28948af1ce6b9df3758d4990a9e57fb35cc8f32 100644
--- a/docs/user/software-origins/debian.rst
+++ b/docs/user/software-origins/debian.rst
@@ -7,3 +7,13 @@ Debian
    This page is a work in progress.
 
 .. include:: dynamic/debian_status.inc
+
+TODO:
+
+* description of the software origin
+* summary of the lister's algorithm
+* summary of the loader's algorithm
+* URL pattern
+* collect extrinsic metadata?
+* index extrinsic metadata?
+* index intrinsic metadata?
diff --git a/docs/user/software-origins/deposit.rst b/docs/user/software-origins/deposit.rst
index 870595bcafb7f031c0e117192636e91d6fe35d71..2f8c18dd85a95e8022c67ed25be3bd31d04db2a0 100644
--- a/docs/user/software-origins/deposit.rst
+++ b/docs/user/software-origins/deposit.rst
@@ -7,3 +7,13 @@ Deposit
    This page is a work in progress.
 
 .. include:: dynamic/deposit_status.inc
+
+TODO:
+
+* description of the software origin
+* summary of the lister's algorithm
+* summary of the loader's algorithm
+* URL pattern
+* collect extrinsic metadata?
+* index extrinsic metadata?
+* index intrinsic metadata?
diff --git a/docs/user/software-origins/git.rst b/docs/user/software-origins/git.rst
index 815770f3daeed6e9b695841bcedaf19f589f1fcb..6ed860830e2999d9d0f5879a367a185da3e17548 100644
--- a/docs/user/software-origins/git.rst
+++ b/docs/user/software-origins/git.rst
@@ -3,7 +3,12 @@
 Git
 ===
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/git_status.inc
+
+The Git versioning system inspired the |swh| data model, and |swh| fully supports it
+on all forges (including :ref:`user-software-origins-cgit`, :ref:`user-software-origins-gitea`, :ref:`user-software-origins-github`, :ref:`user-software-origins-gitlab`,
+:ref:`user-software-origins-gogs`, and formerly `Gitorious
+<https://www.softwareheritage.org/2016/07/21/gitorious-retrieved/>`__).
+
+Git repositories can also be loaded individually if they are not on any recognized
+forge, through the `Save Code Now <https://archive.softwareheritage.org/save/>`__ interface.
diff --git a/docs/user/software-origins/gitea.rst b/docs/user/software-origins/gitea.rst
index 8f28f5794f81de368c548c6eb0f1ae122c527972..b98b6ff139358650db6540f858dbb431cadf5868 100644
--- a/docs/user/software-origins/gitea.rst
+++ b/docs/user/software-origins/gitea.rst
@@ -3,7 +3,24 @@
 Gitea
 =====
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/gitea_status.inc
+
+Gitea is a Git hosting platform forked from Gogs.
+
+|swh|'s Gitea lister queries the project API (eg. https://try.gitea.io/api/v1/repos/search
+for try.gitea.io) anonymously.
+
+It provides an ``updated_at`` field for each repository, matching the last time
+the repository (TODO: or project? does it cover stuff like PRs and issues?) was updated;
+which is passed as ``last_update`` to the scheduler.
+
+|swh| does not have a specific loader for Gitea; the :ref:`Git
+<user-software-origins-git>` loader is used instead.
+Therefore, origin URLs are Gitea's canonical URLs for the corresponding Git
+repository: :file:`https://{domain}/{owner}/{name}.git``
+
+New Gitea instances can be submitted to |swh| through the
+`Add Forge Now <https://archive.softwareheritage.org/add-forge/request/create/>`_
+interface.
+
+|swh| also archives extrinsic project metadata (eg. project description) from Gitea.
diff --git a/docs/user/software-origins/github.rst b/docs/user/software-origins/github.rst
index e5b57f38239230f3169c611135dccc42cbf60531..65908186181cbe4ed6938991c410cc4256922468 100644
--- a/docs/user/software-origins/github.rst
+++ b/docs/user/software-origins/github.rst
@@ -7,3 +7,13 @@ GitHub
    This page is a work in progress.
 
 .. include:: dynamic/github_status.inc
+
+TODO:
+
+* description of the software origin
+* summary of the lister's algorithm
+* summary of the loader's algorithm
+* URL pattern
+* collect extrinsic metadata?
+* index extrinsic metadata?
+* index intrinsic metadata?
diff --git a/docs/user/software-origins/gitlab.rst b/docs/user/software-origins/gitlab.rst
index 98a988ed1d20c00fb18584fc35c403ea7cad99db..9c3e567fcba8667f43620193912660c2632a9023 100644
--- a/docs/user/software-origins/gitlab.rst
+++ b/docs/user/software-origins/gitlab.rst
@@ -1,9 +1,30 @@
 .. _user-software-origins-gitlab:
+.. _user-software-origins-heptapod:
 
 GitLab
 ======
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/gitlab_status.inc
+
+Gitlab is a Git hosting platform. Its fork Heptapod also supports Mercurial.
+
+|swh|'s Gitlab lister queries the project API (eg. https://gitlab.com/api/v4/projects
+for gitlab.com) anonymously.
+
+It provides a ``last_activity_at`` field for each repository, matching the last time
+the repository (TODO: or project? does it cover stuff like PRs and issues?) was updated;
+which is passed as ``last_update`` to the scheduler.
+
+|swh| does not have a specific loader for Gitlab/Heptapod; the :ref:`Git
+<user-software-origins-git>` and :ref:`Mercurial <<user-software-origins-mercurial>`
+loaders are used instead.
+Therefore, origin URLs are Gitlab/Heptapod's canonical URLs for the corresponding Git
+or Mercurial repository: :file:`https://{domain}/{owner}/{name}.git`` and
+:file:`https://{domain}/{owner}/{name}` respectively.
+
+New Gitlab/Heptapod instances can be submitted to |swh| through the
+`Add Forge Now <https://archive.softwareheritage.org/add-forge/request/create/>`_
+interface.
+
+|swh| currently does not archive extrinsic metadata from Gitlab or Heptapod due to
+`a limitation of the Gitlab API <https://gitlab.com/gitlab-org/gitlab/-/issues/361952>`__.
diff --git a/docs/user/software-origins/gnu.rst b/docs/user/software-origins/gnu.rst
index 5aadba670ab8fae94bf0de30c9debf5458433302..b6d5fc0e5e4d675802e7d3e741acb1de1c942c5d 100644
--- a/docs/user/software-origins/gnu.rst
+++ b/docs/user/software-origins/gnu.rst
@@ -3,7 +3,11 @@
 GNU projects
 ============
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/gnu_status.inc
+
+|swh| archives all software available on https://ftp.gnu.org. It does so by listing
+projects from https://ftp.gnu.org/tree.json.gz and passing them to the
+:ref:`Archive loader <user-software-origins-archive>`.
+
+This API provides a ``time`` field for each file, matching the time the file
+was uploaded was updated; which is passed as ``last_update`` to the scheduler.
diff --git a/docs/user/software-origins/gogs.rst b/docs/user/software-origins/gogs.rst
index cadfb9891a010ec0dec64b324cf8847b7aa9d88a..e27139b28656eaade4a6de2aa14e93e13011d8d6 100644
--- a/docs/user/software-origins/gogs.rst
+++ b/docs/user/software-origins/gogs.rst
@@ -3,7 +3,25 @@
 Gogs
 ====
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/gogs_status.inc
+
+Gogs (Go Git Service) is a Git hosting platform.
+
+|swh|'s Gogs lister queries the project API (eg. https://try.gogs.io/api/v1/repos/search
+for try.gogs.io), usually with an authentication token as Gogs does not allow anonymous
+access.
+
+It provides an ``updated_at`` field for each repository, matching the last time
+the repository (TODO: or project? does it cover stuff like PRs and issues?) was updated;
+which is passed as ``last_update`` to the scheduler.
+
+|swh| does not have a specific loader for Gitea; the :ref:`Git
+<user-software-origins-git>` loader is used instead.
+Therefore, origin URLs are Gogs's canonical URLs for the corresponding Git
+repository: :file:`https://{domain}/{owner}/{name}.git``
+
+New Gogs instances can be submitted to |swh| through the
+`Add Forge Now <https://archive.softwareheritage.org/add-forge/request/create/>`_
+interface.
+
+|swh| does not yet archive extrinsic project metadata (eg. project description) from Gogs.
diff --git a/docs/user/software-origins/golang.rst b/docs/user/software-origins/golang.rst
index 65154b638784cb9c6bab2cb9da859b573f2902a8..aac69b3846f3886e2e6cc43de7014fd1555295e2 100644
--- a/docs/user/software-origins/golang.rst
+++ b/docs/user/software-origins/golang.rst
@@ -3,7 +3,29 @@
 Golang
 ======
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/golang_status.inc
+
+The `Go programming language <https://go.dev/>`_ identifies modules using URL-like
+strings, called the "module path".
+Module paths start with a domain and path to a VCS repository (usually Git) and
+optionally path of a directory within that repository. See the
+`Go Modules Reference <https://go.dev/ref/mod>`_ for details.
+
+|swh| follows the convention of the Golang ecosystem of proxying through the
+proxy.golang.org rather than accessing these repositories directly in order to be
+as close as possible to the Go build system.
+
+Go origin URLs in |swh| are module paths prefixed with ``https://pkg.go.dev/``.
+For example, the origin URL for module ``github.com/gofiber/fiber`` is
+``https://pkg.go.dev/github.com/gofiber/fiber`` (`see it in the archive <https://archive.softwareheritage.org/browse/origin/directory/?origin_url=https://pkg.go.dev/github.com/gofiber/fiber>`__)
+
+In the Golang ecosystem, it is customary to handle breaking changes in a module by
+publishing the new module version at a different path; for example
+``github.com/gofiber/fiber/v2``.
+See `Module version numbering <https://go.dev/doc/modules/version-numbers>`_ for details.
+|swh| follows this convention, and uses different origin URLs for new major versions,
+such as ``https://pkg.go.dev/github.com/gofiber/fiber/v2`` (`see it in the archive <https://archive.softwareheritage.org/browse/origin/directory/?origin_url=https://pkg.go.dev/github.com/gofiber/fiber/v2>`__)
+
+On the technical side, |swh| fetches the list of known Go modules from
+https://index.golang.org/index, and relies on the given timestamps to detect updates
+to packages archived in the past.
diff --git a/docs/user/software-origins/hackage.rst b/docs/user/software-origins/hackage.rst
index c59f46e52f9f73c5052f31f90e623ac0170a4ef2..002b18adb2ce01a79259362d20f58278e69980da 100644
--- a/docs/user/software-origins/hackage.rst
+++ b/docs/user/software-origins/hackage.rst
@@ -3,7 +3,22 @@
 Hackage
 =======
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/hackage_status.inc
+
+`Hackage <https://hackage.haskell.org/>`_ is the main package manager for the
+Haskell ecosystem.
+
+|swh| archives Hackage by querying ``https://hackage.haskell.org/packages/search``, which
+returns the list of packages updated since a given date.
+It then dispatches loading tasks to a dedicated loader, which downloads a list of revisions
+from :file:`https://hackage.haskell.org/package/{pkgname}-{version}/revisions/` and packages
+themselves from
+:file:`https://hackage.haskell.org/package/{pkgname}-{version}/{pkgname}-{version}.tar.gz`.
+
+Metadata from Hackage is archived as part of each package (in ``.cabal`` files).
+
+Resources:
+
+* `Source code of Hackage <https://github.com/haskell/hackage-server>`_
+
+Source code from Hackage is currently only archived on |swh|'s staging infrastructure.
diff --git a/docs/user/software-origins/launchpad.rst b/docs/user/software-origins/launchpad.rst
index ee285506f5f9b17ed8f2e3a799503b4cdacb7424..9accda478ebb6979053da195deb1c6534697d0d4 100644
--- a/docs/user/software-origins/launchpad.rst
+++ b/docs/user/software-origins/launchpad.rst
@@ -3,7 +3,16 @@
 Launchpad
 =========
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/launchpad_status.inc
+
+`Launchpad <https://launchpad.net/>`_ is a Bazaar and Git hosting platform.
+
+It provides a ``bzr_date_last_modified``/``git_date_last_modified`` field for each
+repository, matching the last time the repository was updated;
+which is passed as ``last_update`` to the scheduler.
+
+|swh| does not have a specific loader for Bitbucket; the :ref:`BZR
+<user-software-origins-bzr>` and :ref:`Git <user-software-origins-git>` loaders are used
+instead.
+Therefore, origin URLs are Launchpad canonical URL for the corresponding Bazaar or Git
+repository.
diff --git a/docs/user/software-origins/maven.rst b/docs/user/software-origins/maven.rst
index bd163be4c4e573f87caf09d16456ebad81e11a8e..c283fab54e21dc752402e07b96ec2d1f75bfb384 100644
--- a/docs/user/software-origins/maven.rst
+++ b/docs/user/software-origins/maven.rst
@@ -3,7 +3,14 @@
 Maven
 =====
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/maven_status.inc
+
+`Maven <https://maven.apache.org/>`_ is Java's main package manager. There are multiple
+Maven repositories, each of which store both binary packages (JAR files containing Java
+classes) and source code (as source JARs). |swh| archives the latter.
+
+Additionally, |swh| archives each package's :file:`pom.xml` as :term:`extrinsic metadata`
+and mines them for links to external version control systems to archive.
+
+See the `Maven lister's documentation <https://gitlab.softwareheritage.org/swh/devel/swh-lister/-/blob/master/swh/lister/maven/README.md>`_
+for details on its implementation.
diff --git a/docs/user/software-origins/mercurial.rst b/docs/user/software-origins/mercurial.rst
index e7563c07368ce03a299b5d90c2d87f42e484fb31..4c57c696a468697fe611221510850a0ecd075e7b 100644
--- a/docs/user/software-origins/mercurial.rst
+++ b/docs/user/software-origins/mercurial.rst
@@ -3,7 +3,14 @@
 Mercurial
 =========
 
-.. todo::
-   This page is a work in progress.
-
 .. include:: dynamic/mercurial_status.inc
+
+Mercurial repositories are often discovered through listing package managers
+or forges like :ref:`Heptapod <user-software-origins-gitlab>` or formerly
+:ref:`Bitbucket <user-software-origins-bitbucket>`.
+
+Mercurial repositories can also be loaded individually if they are not on any recognized
+forge, through the `Save Code Now <https://archive.softwareheritage.org/save/>`__ interface.
+
+|swh| supporting loading Mercurial repositories, but is currently missing some advanced
+history manipulation features of Mercurial.
diff --git a/docs/user/software-origins/nixguix.rst b/docs/user/software-origins/nixguix.rst
index dd042fa6865054743bb43422eeffb9ff9d2385db..6acfc6ffc05ab6c51385c1c1fe96f7c9d9596bcd 100644
--- a/docs/user/software-origins/nixguix.rst
+++ b/docs/user/software-origins/nixguix.rst
@@ -7,3 +7,13 @@ Nix and Guix
    This page is a work in progress.
 
 .. include:: dynamic/nixguix_status.inc
+
+TODO:
+
+* description of the software origin
+* summary of the lister's algorithm
+* summary of the loader's algorithm
+* URL pattern
+* collect extrinsic metadata?
+* index extrinsic metadata?
+* index intrinsic metadata?
diff --git a/docs/user/software-origins/npm.rst b/docs/user/software-origins/npm.rst
index 49f4a8f1f6a77e43e72e24ba95e214cbfef49316..3009aa1dee55f55bba26ba6a230234dc67e90502 100644
--- a/docs/user/software-origins/npm.rst
+++ b/docs/user/software-origins/npm.rst
@@ -7,3 +7,13 @@ NPM
    This page is a work in progress.
 
 .. include:: dynamic/npm_status.inc
+
+TODO:
+
+* description of the software origin
+* summary of the lister's algorithm
+* summary of the loader's algorithm
+* URL pattern
+* collect extrinsic metadata?
+* index extrinsic metadata?
+* index intrinsic metadata?
diff --git a/docs/user/software-origins/opam.rst b/docs/user/software-origins/opam.rst
index 88a5834b30a908e7d449eadece8a32a0f50cb346..3516300e0ac2537bb405b7fed815f9e673550878 100644
--- a/docs/user/software-origins/opam.rst
+++ b/docs/user/software-origins/opam.rst
@@ -7,3 +7,13 @@ Opam
    This page is a work in progress.
 
 .. include:: dynamic/opam_status.inc
+
+TODO:
+
+* description of the software origin
+* summary of the lister's algorithm
+* summary of the loader's algorithm
+* URL pattern
+* collect extrinsic metadata?
+* index extrinsic metadata?
+* index intrinsic metadata?
diff --git a/docs/user/software-origins/packagist.rst b/docs/user/software-origins/packagist.rst
index f51ece2587a92cc14a8dcb0b3b6939c0902f63e1..aa7d94960d55fab947b5ecef384e4d32f0edcd76 100644
--- a/docs/user/software-origins/packagist.rst
+++ b/docs/user/software-origins/packagist.rst
@@ -7,3 +7,13 @@ Packagist
    This page is a work in progress.
 
 .. include:: dynamic/packagist_status.inc
+
+TODO:
+
+* description of the software origin
+* summary of the lister's algorithm
+* summary of the loader's algorithm
+* URL pattern
+* collect extrinsic metadata?
+* index extrinsic metadata?
+* index intrinsic metadata?
diff --git a/docs/user/software-origins/phabricator.rst b/docs/user/software-origins/phabricator.rst
index f86258a62c20b0564650d7f0e87a00d96a450679..66f79a6b171e412a8e6cb44d425967996ec0f199 100644
--- a/docs/user/software-origins/phabricator.rst
+++ b/docs/user/software-origins/phabricator.rst
@@ -7,3 +7,13 @@ Phabricator
    This page is a work in progress.
 
 .. include:: dynamic/phabricator_status.inc
+
+TODO:
+
+* description of the software origin
+* summary of the lister's algorithm
+* summary of the loader's algorithm
+* URL pattern
+* collect extrinsic metadata?
+* index extrinsic metadata?
+* index intrinsic metadata?
diff --git a/docs/user/software-origins/pubdev.rst b/docs/user/software-origins/pubdev.rst
index 1fc7bf757fca24bb837e1b00f3a924529696f8e4..7288a0b4d5fa6d386841bc1bdc18a00d902e93f1 100644
--- a/docs/user/software-origins/pubdev.rst
+++ b/docs/user/software-origins/pubdev.rst
@@ -7,3 +7,13 @@ Pub.Dev
    This page is a work in progress.
 
 .. include:: dynamic/pubdev_status.inc
+
+TODO:
+
+* description of the software origin
+* summary of the lister's algorithm
+* summary of the loader's algorithm
+* URL pattern
+* collect extrinsic metadata?
+* index extrinsic metadata?
+* index intrinsic metadata?
diff --git a/docs/user/software-origins/puppet.rst b/docs/user/software-origins/puppet.rst
index e77efacfbdbfc262e44dfc58b1827c9492785d21..4f0481cdcfe5f3811aac875d5d0ff81b601d5c4b 100644
--- a/docs/user/software-origins/puppet.rst
+++ b/docs/user/software-origins/puppet.rst
@@ -7,3 +7,13 @@ Puppet
    This page is a work in progress.
 
 .. include:: dynamic/puppet_status.inc
+
+TODO:
+
+* description of the software origin
+* summary of the lister's algorithm
+* summary of the loader's algorithm
+* URL pattern
+* collect extrinsic metadata?
+* index extrinsic metadata?
+* index intrinsic metadata?
diff --git a/docs/user/software-origins/pypi.rst b/docs/user/software-origins/pypi.rst
index 8fe948b0132dccee9776f79f88232a4c7ef6a1e4..ecb22274880bf17ba994f56a49c4eff82a834674 100644
--- a/docs/user/software-origins/pypi.rst
+++ b/docs/user/software-origins/pypi.rst
@@ -7,3 +7,13 @@ PyPI
    This page is a work in progress.
 
 .. include:: dynamic/pypi_status.inc
+
+TODO:
+
+* description of the software origin
+* summary of the lister's algorithm
+* summary of the loader's algorithm
+* URL pattern
+* collect extrinsic metadata?
+* index extrinsic metadata?
+* index intrinsic metadata?
diff --git a/docs/user/software-origins/rubygems.rst b/docs/user/software-origins/rubygems.rst
index 37ff1dda049f21ffec06d667ca6ef9ffc8729a5f..61605053781f2b95fcf65e82d5f72b3da0468085 100644
--- a/docs/user/software-origins/rubygems.rst
+++ b/docs/user/software-origins/rubygems.rst
@@ -7,3 +7,13 @@ RubyGems
    This page is a work in progress.
 
 .. include:: dynamic/rubygems_status.inc
+
+TODO:
+
+* description of the software origin
+* summary of the lister's algorithm
+* summary of the loader's algorithm
+* URL pattern
+* collect extrinsic metadata?
+* index extrinsic metadata?
+* index intrinsic metadata?
diff --git a/docs/user/software-origins/sourceforge.rst b/docs/user/software-origins/sourceforge.rst
index 533e06f5a959ab4003f98cb3054b19e6e62af8e7..934afb42a2e2cd961a4baea81f5ece70d419fc0c 100644
--- a/docs/user/software-origins/sourceforge.rst
+++ b/docs/user/software-origins/sourceforge.rst
@@ -7,3 +7,13 @@ SourceForge
    This page is a work in progress.
 
 .. include:: dynamic/sourceforge_status.inc
+
+TODO:
+
+* description of the software origin
+* summary of the lister's algorithm
+* summary of the loader's algorithm
+* URL pattern
+* collect extrinsic metadata?
+* index extrinsic metadata?
+* index intrinsic metadata?
diff --git a/docs/user/software-origins/svn.rst b/docs/user/software-origins/svn.rst
index c36ebeac0a42036af615b4673f6e71d4938120c5..0013055655f1836c5cb53c1b72a1f13ce0bc2f92 100644
--- a/docs/user/software-origins/svn.rst
+++ b/docs/user/software-origins/svn.rst
@@ -7,3 +7,13 @@ Subversion
    This page is a work in progress.
 
 .. include:: dynamic/svn_status.inc
+
+TODO:
+
+* description of the software origin
+* summary of the lister's algorithm
+* summary of the loader's algorithm
+* URL pattern
+* collect extrinsic metadata?
+* index extrinsic metadata?
+* index intrinsic metadata?
diff --git a/docs/user/software-origins/tuleap.rst b/docs/user/software-origins/tuleap.rst
index d718a5a9be5f5a176ec3b0152e7f000d34197214..417a37a8251ca5c48ae8fa9fc51a548cfc03d6a9 100644
--- a/docs/user/software-origins/tuleap.rst
+++ b/docs/user/software-origins/tuleap.rst
@@ -7,3 +7,13 @@ Tuleap
    This page is a work in progress.
 
 .. include:: dynamic/tuleap_status.inc
+
+TODO:
+
+* description of the software origin
+* summary of the lister's algorithm
+* summary of the loader's algorithm
+* URL pattern
+* collect extrinsic metadata?
+* index extrinsic metadata?
+* index intrinsic metadata?