metrics_kvbm.rs 2.48 KB
Newer Older
1
2
3
4
5
6
7
8
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

use dynamo_runtime::metrics::MetricsRegistry;
use prometheus::IntCounter;

#[derive(Clone, Debug)]
pub struct KvbmMetrics {
9
    // number of offload requests
10
    pub offload_requests: IntCounter,
11
12
13
14
15
16
17
18
19
20
21
22
23
24

    // number of blocks offloaded from device to host
    pub offload_blocks_d2h: IntCounter,

    // number of onboard requests
    pub onboard_requests: IntCounter,

    // number of blocks onboarded from host to device
    pub onboard_blocks_h2d: IntCounter,

    // number of blocks onboarded from disk to device
    pub onboard_blocks_d2d: IntCounter,

    // number of save kv layer requests
25
    pub save_kv_layer_requests: IntCounter,
26
27
28

    // number of matched tokens from KVBM
    pub matched_tokens: IntCounter,
29
30
31
32
33
34
35
}

impl KvbmMetrics {
    pub fn new(mr: &dyn MetricsRegistry) -> Self {
        let offload_requests = mr
            .create_intcounter("offload_requests", "The number of offload requests", &[])
            .unwrap();
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
        let offload_blocks_d2h = mr
            .create_intcounter(
                "offload_blocks_d2h",
                "The number of offload blocks from device to host",
                &[],
            )
            .unwrap();
        let onboard_requests = mr
            .create_intcounter("onboard_requests", "The number of onboard requests", &[])
            .unwrap();
        let onboard_blocks_h2d = mr
            .create_intcounter(
                "onboard_blocks_h2d",
                "The number of onboard blocks from host to device",
                &[],
            )
            .unwrap();
        let onboard_blocks_d2d = mr
            .create_intcounter(
                "onboard_blocks_d2d",
                "The number of onboard blocks from disk to device",
                &[],
            )
            .unwrap();
60
61
62
63
64
65
66
        let save_kv_layer_requests = mr
            .create_intcounter(
                "save_kv_layer_requests",
                "The number of save kv layer requests",
                &[],
            )
            .unwrap();
67
68
69
        let matched_tokens = mr
            .create_intcounter("matched_tokens", "The number of matched tokens", &[])
            .unwrap();
70
71
        Self {
            offload_requests,
72
73
74
75
            offload_blocks_d2h,
            onboard_requests,
            onboard_blocks_h2d,
            onboard_blocks_d2d,
76
            save_kv_layer_requests,
77
            matched_tokens,
78
79
80
        }
    }
}