From ddef7c0475cc1a30cd980fb257a3010671ff0753 Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Thu, 12 Sep 2024 14:24:44 +0200
Subject: [PATCH 1/2] Add support for the gRPC Health Checking Protocol

---
 Cargo.lock             | 14 ++++++++++++++
 docs/grpc-api.rst      | 22 +++++++++++++++++++++-
 grpc-server/Cargo.toml |  1 +
 grpc-server/src/lib.rs | 15 ++++++++++++---
 4 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index e90481cc8..f342e57b6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3945,6 +3945,7 @@ dependencies = [
  "tokio-stream",
  "tonic",
  "tonic-build",
+ "tonic-health",
  "tonic-middleware",
  "tonic-reflection",
 ]
@@ -4347,6 +4348,19 @@ dependencies = [
  "syn 2.0.47",
 ]
 
+[[package]]
+name = "tonic-health"
+version = "0.12.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0a34e6f706bae26b2b490e1da5c3f6a6ff87cae442bcbc7c881bab9631b5a7"
+dependencies = [
+ "async-stream",
+ "prost 0.13.2",
+ "tokio",
+ "tokio-stream",
+ "tonic",
+]
+
 [[package]]
 name = "tonic-middleware"
 version = "0.2.1"
diff --git a/docs/grpc-api.rst b/docs/grpc-api.rst
index 574a0071a..1eab50464 100644
--- a/docs/grpc-api.rst
+++ b/docs/grpc-api.rst
@@ -881,11 +881,31 @@ restrictions.
 Because ``midpoint_index = 5``, the common ancestor is
 ``swh:1:rev:0000000000000000000000000000000000000018``.
 
+.. _swh-graph-grpc-monitoring:
+
+Monitoring
+==========
+
+.. _swh-graph-grpc-monitor-health:
+
+Healthiness
+-----------
+
+This service implements the `gRPC Health Checking Protocol<https://github.com/grpc/grpc/blob/master/doc/health-checking.md>`_:
+
+.. code-block:: console
+
+    $ ~/grpc/cmake/build/grpc_cli call localhost:50091 grpc.health.v1.Health.Check "service: 'swh.graph.TraversalService'"
+    status: SERVING
+
+It is always considered healthy while running, as OOM errors and failures
+to read from disk cause a crash -- though it will not answer while all
+workers are busy.
 
 .. _swh-graph-grpc-statsd-metrics:
 
 StatsD metrics
-==============
+--------------
 
 The gRPC server sends `Statsd <https://www.datadoghq.com/blog/statsd/>`_ metrics
 to ``localhost:8125`` (overridable with :envvar:`STATSD_HOST` and :envvar:`STATSD_PORT`
diff --git a/grpc-server/Cargo.toml b/grpc-server/Cargo.toml
index 7997f721f..295052d6b 100644
--- a/grpc-server/Cargo.toml
+++ b/grpc-server/Cargo.toml
@@ -25,6 +25,7 @@ log.workspace = true
 prost = "0.13"
 prost-types = "0.13"
 tonic = "0.12.2"
+tonic-health = "0.12.2"
 tonic-reflection = "0.12.2"
 tonic-middleware = "0.2.1"
 http-body = "1"  # must match the version used by tonic
diff --git a/grpc-server/src/lib.rs b/grpc-server/src/lib.rs
index ab8910b58..49d931919 100644
--- a/grpc-server/src/lib.rs
+++ b/grpc-server/src/lib.rs
@@ -31,6 +31,8 @@ pub mod proto {
         tonic::include_file_descriptor_set!("swhgraph_descriptor");
 }
 
+use proto::traversal_service_server::TraversalServiceServer;
+
 mod filters;
 mod find_path;
 mod node_builder;
@@ -290,22 +292,29 @@ pub async fn serve<G: SwhFullGraph + Sync + Send + 'static>(
     statsd_client: cadence::StatsdClient,
 ) -> Result<(), tonic::transport::Error> {
     let graph = Arc::new(graph);
+
+    let (mut health_reporter, health_service) = tonic_health::server::health_reporter();
+    health_reporter
+        .set_serving::<TraversalServiceServer<TraversalService<Arc<G>>>>()
+        .await;
+
     Server::builder()
         .add_service(MiddlewareFor::new(
-            proto::traversal_service_server::TraversalServiceServer::new(TraversalService::new(
-                graph,
-            )),
+            TraversalServiceServer::new(TraversalService::new(graph)),
             MetricsMiddleware::new(statsd_client),
         ))
+        .add_service(health_service)
         .add_service(
             tonic_reflection::server::Builder::configure()
                 .register_encoded_file_descriptor_set(proto::FILE_DESCRIPTOR_SET)
+                .register_encoded_file_descriptor_set(tonic_health::pb::FILE_DESCRIPTOR_SET)
                 .build_v1()
                 .expect("Could not load v1 reflection service"),
         )
         .add_service(
             tonic_reflection::server::Builder::configure()
                 .register_encoded_file_descriptor_set(proto::FILE_DESCRIPTOR_SET)
+                .register_encoded_file_descriptor_set(tonic_health::pb::FILE_DESCRIPTOR_SET)
                 .build_v1alpha()
                 .expect("Could not load v1alpha reflection service"),
         )
-- 
GitLab


From abe2922641d69c778dbf8d318f62133f1afd5587 Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Thu, 12 Sep 2024 16:15:41 +0200
Subject: [PATCH 2/2] docs: Fix typo

---
 docs/grpc-api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/grpc-api.rst b/docs/grpc-api.rst
index 1eab50464..caf604d77 100644
--- a/docs/grpc-api.rst
+++ b/docs/grpc-api.rst
@@ -891,7 +891,7 @@ Monitoring
 Healthiness
 -----------
 
-This service implements the `gRPC Health Checking Protocol<https://github.com/grpc/grpc/blob/master/doc/health-checking.md>`_:
+This service implements the `gRPC Health Checking Protocol <https://github.com/grpc/grpc/blob/master/doc/health-checking.md>`_:
 
 .. code-block:: console
 
-- 
GitLab