"deploy/cloud/vscode:/vscode.git/clone" did not exist on "73b0cdb410d1fbb9700fb6e12e0c1525924964f4"
input.rs 4.19 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

//! This module contains tools to gather a prompt from a user, forward it to an engine and return
//! the response.
//! See the Input enum for the inputs available. Input::Http (OpenAI compatible HTTP server)
//! and Input::Text (interactive chat) are good places to start.
//! The main entry point is `run_input`.

use std::{
    fmt,
    io::{IsTerminal as _, Read as _},
    path::PathBuf,
14
    str::FromStr,
15
16
17
18
};

pub mod batch;
mod common;
19
pub use common::{build_routed_pipeline, build_routed_pipeline_with_preprocessor};
20
pub mod endpoint;
GuanLuo's avatar
GuanLuo committed
21
pub mod grpc;
22
23
24
pub mod http;
pub mod text;

25
use dynamo_runtime::protocols::ENDPOINT_SCHEME;
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

const BATCH_PREFIX: &str = "batch:";

/// The various ways of connecting prompts to an engine
#[derive(PartialEq)]
pub enum Input {
    /// Run an OpenAI compatible HTTP server
    Http,

    /// Single prompt on stdin
    Stdin,

    /// Interactive chat
    Text,

    /// Pull requests from a namespace/component/endpoint path.
    Endpoint(String),

    /// Batch mode. Run all the prompts, write the outputs, exit.
    Batch(PathBuf),
GuanLuo's avatar
GuanLuo committed
46
47
48

    // Run an KServe compatible gRPC server
    Grpc,
49
50
}

51
52
53
54
55
56
57
58
impl FromStr for Input {
    type Err = anyhow::Error;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        Input::try_from(s)
    }
}

59
60
61
62
63
64
impl TryFrom<&str> for Input {
    type Error = anyhow::Error;

    fn try_from(s: &str) -> anyhow::Result<Self> {
        match s {
            "http" => Ok(Input::Http),
GuanLuo's avatar
GuanLuo committed
65
            "grpc" => Ok(Input::Grpc),
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
            "text" => Ok(Input::Text),
            "stdin" => Ok(Input::Stdin),
            endpoint_path if endpoint_path.starts_with(ENDPOINT_SCHEME) => {
                Ok(Input::Endpoint(endpoint_path.to_string()))
            }
            batch_patch if batch_patch.starts_with(BATCH_PREFIX) => {
                let path = batch_patch.strip_prefix(BATCH_PREFIX).unwrap();
                Ok(Input::Batch(PathBuf::from(path)))
            }
            e => Err(anyhow::anyhow!("Invalid in= option '{e}'")),
        }
    }
}

impl fmt::Display for Input {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let s = match self {
            Input::Http => "http",
GuanLuo's avatar
GuanLuo committed
84
            Input::Grpc => "grpc",
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
            Input::Text => "text",
            Input::Stdin => "stdin",
            Input::Endpoint(path) => path,
            Input::Batch(path) => &path.display().to_string(),
        };
        write!(f, "{s}")
    }
}

impl Default for Input {
    fn default() -> Self {
        if std::io::stdin().is_terminal() {
            Input::Text
        } else {
            Input::Stdin
        }
    }
}

/// Run the given engine (EngineConfig) connected to an input.
/// Does not return until the input exits.
106
107
/// For Input::Endpoint pass a DistributedRuntime. For everything else pass either a Runtime or a
/// DistributedRuntime.
108
pub async fn run_input(
109
    drt: dynamo_runtime::DistributedRuntime,
110
111
112
    in_opt: Input,
    engine_config: super::EngineConfig,
) -> anyhow::Result<()> {
113
114
115
116
117
118
119
    // Initialize audit bus + sink workers (off hot path; fan-out supported)
    if crate::audit::config::policy().enabled {
        let cap: usize = std::env::var("DYN_AUDIT_CAPACITY")
            .ok()
            .and_then(|v| v.parse().ok())
            .unwrap_or(1024);
        crate::audit::bus::init(cap);
120
        crate::audit::sink::spawn_workers_from_env().await?;
121
        tracing::info!(cap, "Audit initialized");
122
123
    }

124
125
    match in_opt {
        Input::Http => {
126
            http::run(drt, engine_config).await?;
127
        }
GuanLuo's avatar
GuanLuo committed
128
        Input::Grpc => {
129
            grpc::run(drt, engine_config).await?;
GuanLuo's avatar
GuanLuo committed
130
        }
131
        Input::Text => {
132
            text::run(drt, None, engine_config).await?;
133
134
135
136
        }
        Input::Stdin => {
            let mut prompt = String::new();
            std::io::stdin().read_to_string(&mut prompt).unwrap();
137
            text::run(drt, Some(prompt), engine_config).await?;
138
139
        }
        Input::Batch(path) => {
140
            batch::run(drt, path, engine_config).await?;
141
142
        }
        Input::Endpoint(path) => {
143
            endpoint::run(drt, path, engine_config).await?;
144
145
146
147
        }
    }
    Ok(())
}