leader.rs 3.53 KB
Newer Older
Ryan Olson's avatar
Ryan Olson committed
1
2
3
4
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

use super::*;
Richard Huo's avatar
Richard Huo committed
5

Ryan Olson's avatar
Ryan Olson committed
6
use derive_getters::Dissolve;
7
8
9
use llm_rs::block_manager::distributed::{
    KvbmLeader as KvbmLeaderImpl, KvbmLeaderConfig, KvbmLeaderNumBlocksConfig,
};
10
use utils::{get_leader_zmq_ack_url, get_leader_zmq_pub_url};
Ryan Olson's avatar
Ryan Olson committed
11

12
13
14
use dynamo_runtime::config::environment_names::kvbm::cpu_cache as env_cpu_cache;
use dynamo_runtime::config::environment_names::kvbm::disk_cache as env_disk_cache;
use dynamo_runtime::config::environment_names::kvbm::leader as env_kvbm_leader;
Ryan Olson's avatar
Ryan Olson committed
15

16
const DEFAULT_INIT_TIMEOUT_SECS: u64 = 1800;
Ryan Olson's avatar
Ryan Olson committed
17

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
fn read_env_usize(key: &str) -> Option<usize> {
    std::env::var(key).ok()?.trim().parse::<usize>().ok()
}

fn read_cache_size_float(key: &str) -> f64 {
    std::env::var(key)
        .unwrap_or_default()
        .parse::<f64>()
        .unwrap_or(0.0)
}

fn get_blocks_config(cache_size_key: &str, override_key: &str) -> KvbmLeaderNumBlocksConfig {
    if let Some(nblocks) = read_env_usize(override_key) {
        // Optional: still read cache size for observability, but override takes precedence.
        let cache_gb: f64 = read_cache_size_float(cache_size_key);
        return KvbmLeaderNumBlocksConfig {
            cache_size_in_gb: cache_gb,
            num_blocks_overriden: nblocks,
        };
    }

    // No override -> compute from cache size (in GB)
    let cache_gb: f64 = read_cache_size_float(cache_size_key);
    KvbmLeaderNumBlocksConfig {
        cache_size_in_gb: cache_gb,
        num_blocks_overriden: 0,
Ryan Olson's avatar
Ryan Olson committed
44
45
46
47
48
49
50
51
52
53
54
55
56
57
    }
}

fn get_leader_init_timeout_secs(override_key: &str) -> u64 {
    std::env::var(override_key)
        .ok()
        .and_then(|v| v.parse::<u64>().ok())
        .unwrap_or(DEFAULT_INIT_TIMEOUT_SECS)
}

#[pyclass]
#[derive(Clone, Dissolve)]
pub struct KvbmLeader {
    leader: Arc<KvbmLeaderImpl>,
Richard Huo's avatar
Richard Huo committed
58
    drt: Option<Arc<rs::DistributedRuntime>>,
Ryan Olson's avatar
Ryan Olson committed
59
60
61
62
63
64
65
66
67
68
69
}

impl KvbmLeader {
    pub fn get_inner(&self) -> Arc<KvbmLeaderImpl> {
        self.leader.clone()
    }
}

#[pymethods]
impl KvbmLeader {
    #[new]
Richard Huo's avatar
Richard Huo committed
70
71
72
73
74
75
76
77
78
79
    #[pyo3(signature = (world_size, drt=None))]
    fn new(world_size: usize, drt: Option<PyObject>) -> PyResult<Self> {
        let drt: Option<Arc<rs::DistributedRuntime>> = Python::with_gil(|py| {
            if let Some(obj) = drt {
                extract_distributed_runtime_from_obj(py, obj)
            } else {
                Ok(None)
            }
        })?;

Ryan Olson's avatar
Ryan Olson committed
80
        let leader_init_timeout_sec: u64 =
81
            get_leader_init_timeout_secs(env_kvbm_leader::DYN_KVBM_LEADER_WORKER_INIT_TIMEOUT_SECS);
Ryan Olson's avatar
Ryan Olson committed
82
83
84
85

        let config = KvbmLeaderConfig::builder()
            .world_size(world_size)
            .leader_init_timeout_secs(leader_init_timeout_sec)
86
87
88
89
90
91
92
93
            .host_blocks_config(get_blocks_config(
                env_cpu_cache::DYN_KVBM_CPU_CACHE_GB,
                env_cpu_cache::DYN_KVBM_CPU_CACHE_OVERRIDE_NUM_BLOCKS,
            ))
            .disk_blocks_config(get_blocks_config(
                env_disk_cache::DYN_KVBM_DISK_CACHE_GB,
                env_disk_cache::DYN_KVBM_DISK_CACHE_OVERRIDE_NUM_BLOCKS,
            ))
94
95
            .leader_pub_url(get_leader_zmq_pub_url())
            .leader_ack_url(get_leader_zmq_ack_url())
Ryan Olson's avatar
Ryan Olson committed
96
97
98
            .build()
            .map_err(to_pyerr)?;

99
100
        config.sanity_check().map_err(to_pyerr)?;

Richard Huo's avatar
Richard Huo committed
101
        let rt = get_current_tokio_handle();
Ryan Olson's avatar
Ryan Olson committed
102
103
104
105
106
107
108
109
110
111

        let leader =
            rt.block_on(async move { KvbmLeaderImpl::new(config).await.map_err(to_pyerr) })?;

        Ok(Self {
            leader: Arc::new(leader),
            drt,
        })
    }
}