"pcdet/datasets/vscode:/vscode.git/clone" did not exist on "edb82b9f7ec575aebd2e8d9c704ca911d6197f9d"
opt.rs 8.07 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

16
use std::{fmt, io::IsTerminal as _, path::PathBuf};
17

18
19
use crate::ENDPOINT_SCHEME;

20
21
const BATCH_PREFIX: &str = "batch:";

22
#[derive(PartialEq)]
23
24
25
26
pub enum Input {
    /// Run an OpenAI compatible HTTP server
    Http,

27
28
29
30
    /// Single prompt on stdin
    Stdin,

    /// Interactive chat
31
    Text,
32
33
34

    /// Pull requests from a namespace/component/endpoint path.
    Endpoint(String),
35

36
37
38
    /// Batch mode. Run all the prompts, write the outputs, exit.
    Batch(PathBuf),

39
40
41
42
    /// Start the engine but don't provide any way to talk to it.
    /// For multi-node sglang, where the engine connects directly
    /// to the co-ordinator via torch distributed / nccl.
    None,
43
44
45
46
47
48
49
50
51
}

impl TryFrom<&str> for Input {
    type Error = anyhow::Error;

    fn try_from(s: &str) -> anyhow::Result<Self> {
        match s {
            "http" => Ok(Input::Http),
            "text" => Ok(Input::Text),
52
            "stdin" => Ok(Input::Stdin),
53
            "none" => Ok(Input::None),
54
55
56
57
            endpoint_path if endpoint_path.starts_with(ENDPOINT_SCHEME) => {
                let path = endpoint_path.strip_prefix(ENDPOINT_SCHEME).unwrap();
                Ok(Input::Endpoint(path.to_string()))
            }
58
59
60
61
            batch_patch if batch_patch.starts_with(BATCH_PREFIX) => {
                let path = batch_patch.strip_prefix(BATCH_PREFIX).unwrap();
                Ok(Input::Batch(PathBuf::from(path)))
            }
62
63
64
65
66
67
68
69
70
71
            e => Err(anyhow::anyhow!("Invalid in= option '{e}'")),
        }
    }
}

impl fmt::Display for Input {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let s = match self {
            Input::Http => "http",
            Input::Text => "text",
72
            Input::Stdin => "stdin",
73
            Input::Endpoint(path) => path,
74
            Input::Batch(path) => &path.display().to_string(),
75
            Input::None => "none",
76
77
78
79
80
        };
        write!(f, "{s}")
    }
}

81
82
83
84
85
86
87
88
89
90
impl Default for Input {
    fn default() -> Self {
        if std::io::stdin().is_terminal() {
            Input::Text
        } else {
            Input::Stdin
        }
    }
}

91
92
93
pub enum Output {
    /// Accept un-preprocessed requests, echo the prompt back as the response
    EchoFull,
94

95
96
97
    /// Accept preprocessed requests, echo the tokens back as the response
    EchoCore,

98
99
100
    /// Publish requests to a namespace/component/endpoint path.
    Endpoint(String),

101
102
103
    #[cfg(feature = "mistralrs")]
    /// Run inference on a model in a GGUF file using mistralrs w/ candle
    MistralRs,
104
105
106
107

    #[cfg(feature = "sglang")]
    /// Run inference using sglang
    SgLang,
108
109
110
111

    #[cfg(feature = "llamacpp")]
    /// Run inference using llama.cpp
    LlamaCpp,
Graham King's avatar
Graham King committed
112
113
114
115

    #[cfg(feature = "vllm")]
    /// Run inference using vllm's engine
    Vllm,
Graham King's avatar
Graham King committed
116
117
118
119

    #[cfg(feature = "trtllm")]
    /// Run inference using trtllm
    TrtLLM,
120

121
122
    /// Run inference using a user supplied python file that accepts and returns
    /// strings. It does it's own pre-processing.
123
124
    #[cfg(feature = "python")]
    PythonStr(String),
125
126
127
128
129

    /// Run inference using a user supplied python file that accepts and returns
    /// tokens. We do the pre-processing.
    #[cfg(feature = "python")]
    PythonTok(String),
130
131
132
    //
    // DEVELOPER NOTE
    // If you add an engine add it to `available_engines` below, and to Default if it makes sense
133
134
135
136
137
138
139
}

impl TryFrom<&str> for Output {
    type Error = anyhow::Error;

    fn try_from(s: &str) -> anyhow::Result<Self> {
        match s {
140
141
142
            #[cfg(feature = "mistralrs")]
            "mistralrs" => Ok(Output::MistralRs),

143
144
145
            #[cfg(feature = "sglang")]
            "sglang" => Ok(Output::SgLang),

146
147
148
            #[cfg(feature = "llamacpp")]
            "llamacpp" | "llama_cpp" => Ok(Output::LlamaCpp),

Graham King's avatar
Graham King committed
149
150
151
            #[cfg(feature = "vllm")]
            "vllm" => Ok(Output::Vllm),

Graham King's avatar
Graham King committed
152
153
154
            #[cfg(feature = "trtllm")]
            "trtllm" => Ok(Output::TrtLLM),

155
            "echo_full" => Ok(Output::EchoFull),
156
            "echo_core" => Ok(Output::EchoCore),
157
158
159
160
161
162

            endpoint_path if endpoint_path.starts_with(ENDPOINT_SCHEME) => {
                let path = endpoint_path.strip_prefix(ENDPOINT_SCHEME).unwrap();
                Ok(Output::Endpoint(path.to_string()))
            }

163
164
165
166
167
168
169
170
            #[cfg(feature = "python")]
            python_str_gen if python_str_gen.starts_with(crate::PYTHON_STR_SCHEME) => {
                let path = python_str_gen
                    .strip_prefix(crate::PYTHON_STR_SCHEME)
                    .unwrap();
                Ok(Output::PythonStr(path.to_string()))
            }

171
172
173
174
175
176
177
178
            #[cfg(feature = "python")]
            python_tok_gen if python_tok_gen.starts_with(crate::PYTHON_TOK_SCHEME) => {
                let path = python_tok_gen
                    .strip_prefix(crate::PYTHON_TOK_SCHEME)
                    .unwrap();
                Ok(Output::PythonTok(path.to_string()))
            }

179
180
181
182
183
184
185
186
            e => Err(anyhow::anyhow!("Invalid out= option '{e}'")),
        }
    }
}

impl fmt::Display for Output {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let s = match self {
187
188
189
            #[cfg(feature = "mistralrs")]
            Output::MistralRs => "mistralrs",

190
191
192
            #[cfg(feature = "sglang")]
            Output::SgLang => "sglang",

193
194
195
            #[cfg(feature = "llamacpp")]
            Output::LlamaCpp => "llamacpp",

Graham King's avatar
Graham King committed
196
197
198
            #[cfg(feature = "vllm")]
            Output::Vllm => "vllm",

Graham King's avatar
Graham King committed
199
200
201
            #[cfg(feature = "trtllm")]
            Output::TrtLLM => "trtllm",

202
            Output::EchoFull => "echo_full",
203
            Output::EchoCore => "echo_core",
204
205

            Output::Endpoint(path) => path,
206
207

            #[cfg(feature = "python")]
208
            Output::PythonStr(_) => "pystr",
209
210

            #[cfg(feature = "python")]
211
            Output::PythonTok(_) => "pytok",
212
213
214
215
        };
        write!(f, "{s}")
    }
}
216

217
218
219
/// Returns the engine to use if user did not say on cmd line.
/// Nearly always defaults to mistralrs which has no dependencies and we include by default.
/// If built with --no-default-features and a specific engine, default to that.
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#[allow(unused_assignments, unused_mut)]
impl Default for Output {
    fn default() -> Self {
        // Default if no engines
        let mut out = Output::EchoFull;

        #[cfg(feature = "llamacpp")]
        {
            out = Output::LlamaCpp;
        }

        #[cfg(feature = "sglang")]
        {
            out = Output::SgLang;
        }

        #[cfg(feature = "vllm")]
        {
            out = Output::Vllm;
        }

241
242
243
244
245
        #[cfg(feature = "mistralrs")]
        {
            out = Output::MistralRs;
        }

246
247
248
        out
    }
}
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287

impl Output {
    #[allow(unused_mut)]
    pub fn available_engines() -> Vec<String> {
        let mut out = vec!["echo_core".to_string(), "echo_full".to_string()];
        #[cfg(feature = "mistralrs")]
        {
            out.push(Output::MistralRs.to_string());
        }

        #[cfg(feature = "llamacpp")]
        {
            out.push(Output::LlamaCpp.to_string());
        }

        #[cfg(feature = "sglang")]
        {
            out.push(Output::SgLang.to_string());
        }

        #[cfg(feature = "vllm")]
        {
            out.push(Output::Vllm.to_string());
        }

        #[cfg(feature = "python")]
        {
            out.push(Output::PythonStr("file.py".to_string()).to_string());
            out.push(Output::PythonTok("file.py".to_string()).to_string());
        }

        #[cfg(feature = "trtllm")]
        {
            out.push(Output::TrtLLM.to_string());
        }

        out
    }
}