diff --git a/Cargo.lock b/Cargo.lock index e90481cc88d0857dcfe8606d651918a161af8c17..f342e57b68db9565dd7e404c5a98f44d51f31e5f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3945,6 +3945,7 @@ dependencies = [ "tokio-stream", "tonic", "tonic-build", + "tonic-health", "tonic-middleware", "tonic-reflection", ] @@ -4347,6 +4348,19 @@ dependencies = [ "syn 2.0.47", ] +[[package]] +name = "tonic-health" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0a34e6f706bae26b2b490e1da5c3f6a6ff87cae442bcbc7c881bab9631b5a7" +dependencies = [ + "async-stream", + "prost 0.13.2", + "tokio", + "tokio-stream", + "tonic", +] + [[package]] name = "tonic-middleware" version = "0.2.1" diff --git a/docs/grpc-api.rst b/docs/grpc-api.rst index 574a0071a0dd3820660a657b8fb8bf73c9b6d3cd..caf604d7786141e2d524f24f2e9a7995e573a705 100644 --- a/docs/grpc-api.rst +++ b/docs/grpc-api.rst @@ -881,11 +881,31 @@ restrictions. Because ``midpoint_index = 5``, the common ancestor is ``swh:1:rev:0000000000000000000000000000000000000018``. +.. _swh-graph-grpc-monitoring: + +Monitoring +========== + +.. _swh-graph-grpc-monitor-health: + +Healthiness +----------- + +This service implements the `gRPC Health Checking Protocol <https://github.com/grpc/grpc/blob/master/doc/health-checking.md>`_: + +.. code-block:: console + + $ ~/grpc/cmake/build/grpc_cli call localhost:50091 grpc.health.v1.Health.Check "service: 'swh.graph.TraversalService'" + status: SERVING + +It is always considered healthy while running, as OOM errors and failures +to read from disk cause a crash -- though it will not answer while all +workers are busy. .. _swh-graph-grpc-statsd-metrics: StatsD metrics -============== +-------------- The gRPC server sends `Statsd <https://www.datadoghq.com/blog/statsd/>`_ metrics to ``localhost:8125`` (overridable with :envvar:`STATSD_HOST` and :envvar:`STATSD_PORT` diff --git a/grpc-server/Cargo.toml b/grpc-server/Cargo.toml index 7997f721fa088c2529f9508f4d4241ddefa0652d..295052d6bd31e56b8faf0f603719896b20c7df3d 100644 --- a/grpc-server/Cargo.toml +++ b/grpc-server/Cargo.toml @@ -25,6 +25,7 @@ log.workspace = true prost = "0.13" prost-types = "0.13" tonic = "0.12.2" +tonic-health = "0.12.2" tonic-reflection = "0.12.2" tonic-middleware = "0.2.1" http-body = "1" # must match the version used by tonic diff --git a/grpc-server/src/lib.rs b/grpc-server/src/lib.rs index ab8910b58f31f8e928e71678b46eb11f92f1e7a1..49d931919bb71cf92aaa3920dace4baeee206877 100644 --- a/grpc-server/src/lib.rs +++ b/grpc-server/src/lib.rs @@ -31,6 +31,8 @@ pub mod proto { tonic::include_file_descriptor_set!("swhgraph_descriptor"); } +use proto::traversal_service_server::TraversalServiceServer; + mod filters; mod find_path; mod node_builder; @@ -290,22 +292,29 @@ pub async fn serve<G: SwhFullGraph + Sync + Send + 'static>( statsd_client: cadence::StatsdClient, ) -> Result<(), tonic::transport::Error> { let graph = Arc::new(graph); + + let (mut health_reporter, health_service) = tonic_health::server::health_reporter(); + health_reporter + .set_serving::<TraversalServiceServer<TraversalService<Arc<G>>>>() + .await; + Server::builder() .add_service(MiddlewareFor::new( - proto::traversal_service_server::TraversalServiceServer::new(TraversalService::new( - graph, - )), + TraversalServiceServer::new(TraversalService::new(graph)), MetricsMiddleware::new(statsd_client), )) + .add_service(health_service) .add_service( tonic_reflection::server::Builder::configure() .register_encoded_file_descriptor_set(proto::FILE_DESCRIPTOR_SET) + .register_encoded_file_descriptor_set(tonic_health::pb::FILE_DESCRIPTOR_SET) .build_v1() .expect("Could not load v1 reflection service"), ) .add_service( tonic_reflection::server::Builder::configure() .register_encoded_file_descriptor_set(proto::FILE_DESCRIPTOR_SET) + .register_encoded_file_descriptor_set(tonic_health::pb::FILE_DESCRIPTOR_SET) .build_v1alpha() .expect("Could not load v1alpha reflection service"), )