health.rs 2.42 KB
Newer Older
1
2
3
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

4
5
use super::{RouteDoc, service_v2};
use axum::{Json, Router, http::Method, http::StatusCode, response::IntoResponse, routing::get};
6
use dynamo_runtime::instances::list_all_instances;
7
8
9
10
11
12
13
use serde_json::json;
use std::sync::Arc;

pub fn health_check_router(
    state: Arc<service_v2::State>,
    path: Option<String>,
) -> (Vec<RouteDoc>, Router) {
14
    let health_path = path.unwrap_or_else(|| "/health".to_string());
15

16
    let docs: Vec<RouteDoc> = vec![RouteDoc::new(Method::GET, &health_path)];
17
18

    let router = Router::new()
19
        .route(&health_path, get(health_handler))
20
21
22
23
24
        .with_state(state);

    (docs, router)
}

25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
pub fn live_check_router(
    state: Arc<service_v2::State>,
    path: Option<String>,
) -> (Vec<RouteDoc>, Router) {
    let live_path = path.unwrap_or_else(|| "/live".to_string());

    let docs: Vec<RouteDoc> = vec![RouteDoc::new(Method::GET, &live_path)];

    let router = Router::new()
        .route(&live_path, get(live_handler))
        .with_state(state);

    (docs, router)
}

async fn live_handler(
41
    axum::extract::State(state): axum::extract::State<Arc<service_v2::State>>,
42
) -> impl IntoResponse {
43
44
45
46
47
48
49
50
51
52
53
    // Check if the http service is being cancelled/shutdown
    if state.is_cancelled() {
        return (
            StatusCode::SERVICE_UNAVAILABLE,
            Json(json!({
                "status": "shutting_down",
                "message": "Service is shutting down"
            })),
        );
    }

54
55
56
57
58
59
60
61
62
    (
        StatusCode::OK,
        Json(json!({
            "status": "live",
            "message": "Service is live"
        })),
    )
}

63
64
65
async fn health_handler(
    axum::extract::State(state): axum::extract::State<Arc<service_v2::State>>,
) -> impl IntoResponse {
66
    let instances = match list_all_instances(state.discovery()).await {
67
68
        Ok(instances) => instances,
        Err(err) => {
69
            tracing::warn!(%err, "Failed to fetch instances from discovery");
70
            vec![]
71
72
        }
    };
73
    let mut endpoints: Vec<String> = instances
74
        .iter()
75
        .map(|instance| instance.endpoint_id().as_url())
76
        .collect();
77
78
    endpoints.sort();
    endpoints.dedup();
79
80
81
82
83
84
85
86
    (
        StatusCode::OK,
        Json(json!({
            "status": "healthy",
            "endpoints": endpoints,
            "instances": instances
        })),
    )
87
}