entrypoint.rs 2.09 KB
Newer Older
1
2
3
4
5
6
7
8
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

//! The entrypoint module provides tools to build a Dynamo runner.
//! - Create an EngineConfig of the engine (potentially auto-discovered) to execute
//! - Connect it to an Input

pub mod input;
9
pub use input::{build_routed_pipeline, build_routed_pipeline_with_preprocessor};
10
11
12
13
14
15
16
17
18
19
20
21
22
23

use std::sync::Arc;

use dynamo_runtime::pipeline::RouterMode;

use crate::{
    backend::ExecutionContext, engines::StreamingEngine, kv_router::KvRouterConfig,
    local_model::LocalModel,
};

#[derive(Debug, Clone, Default)]
pub struct RouterConfig {
    pub router_mode: RouterMode,
    pub kv_router_config: KvRouterConfig,
24
    pub busy_threshold: Option<f64>,
25
    pub enforce_disagg: bool,
26
27
28
29
30
31
32
}

impl RouterConfig {
    pub fn new(router_mode: RouterMode, kv_router_config: KvRouterConfig) -> Self {
        Self {
            router_mode,
            kv_router_config,
33
            busy_threshold: None,
34
            enforce_disagg: false,
35
36
        }
    }
37
38
39
40
41

    pub fn with_busy_threshold(mut self, threshold: Option<f64>) -> Self {
        self.busy_threshold = threshold;
        self
    }
42
43
44
45
46

    pub fn with_enforce_disagg(mut self, enforce_disagg: bool) -> Self {
        self.enforce_disagg = enforce_disagg;
        self
    }
47
48
}

49
#[derive(Clone)]
50
pub enum EngineConfig {
51
    /// Remote networked engines that we discover via etcd
52
53
54
55
56
57
58
59
60
61
62
63
    Dynamic(Box<LocalModel>),

    /// A Full service engine does it's own tokenization and prompt formatting.
    StaticFull {
        engine: Arc<dyn StreamingEngine>,
        model: Box<LocalModel>,
    },

    /// A core engine expects to be wrapped with pre/post processors that handle tokenization.
    StaticCore {
        engine: ExecutionContext,
        model: Box<LocalModel>,
64
        is_prefill: bool,
65
66
67
68
69
70
71
72
73
74
75
76
77
    },
}

impl EngineConfig {
    fn local_model(&self) -> &LocalModel {
        use EngineConfig::*;
        match self {
            Dynamic(lm) => lm,
            StaticFull { model, .. } => model,
            StaticCore { model, .. } => model,
        }
    }
}