opt.rs 7.66 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

16
use std::{fmt, io::IsTerminal as _};
17

18
19
use crate::ENDPOINT_SCHEME;

20
#[derive(PartialEq)]
21
22
23
24
pub enum Input {
    /// Run an OpenAI compatible HTTP server
    Http,

25
26
27
28
    /// Single prompt on stdin
    Stdin,

    /// Interactive chat
29
    Text,
30
31
32

    /// Pull requests from a namespace/component/endpoint path.
    Endpoint(String),
33
34
35
36
37

    /// Start the engine but don't provide any way to talk to it.
    /// For multi-node sglang, where the engine connects directly
    /// to the co-ordinator via torch distributed / nccl.
    None,
38
39
40
41
42
43
44
45
46
}

impl TryFrom<&str> for Input {
    type Error = anyhow::Error;

    fn try_from(s: &str) -> anyhow::Result<Self> {
        match s {
            "http" => Ok(Input::Http),
            "text" => Ok(Input::Text),
47
            "stdin" => Ok(Input::Stdin),
48
            "none" => Ok(Input::None),
49
50
51
52
            endpoint_path if endpoint_path.starts_with(ENDPOINT_SCHEME) => {
                let path = endpoint_path.strip_prefix(ENDPOINT_SCHEME).unwrap();
                Ok(Input::Endpoint(path.to_string()))
            }
53
54
55
56
57
58
59
60
61
62
            e => Err(anyhow::anyhow!("Invalid in= option '{e}'")),
        }
    }
}

impl fmt::Display for Input {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let s = match self {
            Input::Http => "http",
            Input::Text => "text",
63
            Input::Stdin => "stdin",
64
            Input::Endpoint(path) => path,
65
            Input::None => "none",
66
67
68
69
70
        };
        write!(f, "{s}")
    }
}

71
72
73
74
75
76
77
78
79
80
impl Default for Input {
    fn default() -> Self {
        if std::io::stdin().is_terminal() {
            Input::Text
        } else {
            Input::Stdin
        }
    }
}

81
82
83
pub enum Output {
    /// Accept un-preprocessed requests, echo the prompt back as the response
    EchoFull,
84

85
86
87
    /// Accept preprocessed requests, echo the tokens back as the response
    EchoCore,

88
89
90
    /// Publish requests to a namespace/component/endpoint path.
    Endpoint(String),

91
92
93
    #[cfg(feature = "mistralrs")]
    /// Run inference on a model in a GGUF file using mistralrs w/ candle
    MistralRs,
94
95
96
97

    #[cfg(feature = "sglang")]
    /// Run inference using sglang
    SgLang,
98
99
100
101

    #[cfg(feature = "llamacpp")]
    /// Run inference using llama.cpp
    LlamaCpp,
Graham King's avatar
Graham King committed
102
103
104
105

    #[cfg(feature = "vllm")]
    /// Run inference using vllm's engine
    Vllm,
Graham King's avatar
Graham King committed
106
107
108
109

    #[cfg(feature = "trtllm")]
    /// Run inference using trtllm
    TrtLLM,
110

111
112
    /// Run inference using a user supplied python file that accepts and returns
    /// strings. It does it's own pre-processing.
113
114
    #[cfg(feature = "python")]
    PythonStr(String),
115
116
117
118
119

    /// Run inference using a user supplied python file that accepts and returns
    /// tokens. We do the pre-processing.
    #[cfg(feature = "python")]
    PythonTok(String),
120
121
122
    //
    // DEVELOPER NOTE
    // If you add an engine add it to `available_engines` below, and to Default if it makes sense
123
124
125
126
127
128
129
}

impl TryFrom<&str> for Output {
    type Error = anyhow::Error;

    fn try_from(s: &str) -> anyhow::Result<Self> {
        match s {
130
131
132
            #[cfg(feature = "mistralrs")]
            "mistralrs" => Ok(Output::MistralRs),

133
134
135
            #[cfg(feature = "sglang")]
            "sglang" => Ok(Output::SgLang),

136
137
138
            #[cfg(feature = "llamacpp")]
            "llamacpp" | "llama_cpp" => Ok(Output::LlamaCpp),

Graham King's avatar
Graham King committed
139
140
141
            #[cfg(feature = "vllm")]
            "vllm" => Ok(Output::Vllm),

Graham King's avatar
Graham King committed
142
143
144
            #[cfg(feature = "trtllm")]
            "trtllm" => Ok(Output::TrtLLM),

145
            "echo_full" => Ok(Output::EchoFull),
146
            "echo_core" => Ok(Output::EchoCore),
147
148
149
150
151
152

            endpoint_path if endpoint_path.starts_with(ENDPOINT_SCHEME) => {
                let path = endpoint_path.strip_prefix(ENDPOINT_SCHEME).unwrap();
                Ok(Output::Endpoint(path.to_string()))
            }

153
154
155
156
157
158
159
160
            #[cfg(feature = "python")]
            python_str_gen if python_str_gen.starts_with(crate::PYTHON_STR_SCHEME) => {
                let path = python_str_gen
                    .strip_prefix(crate::PYTHON_STR_SCHEME)
                    .unwrap();
                Ok(Output::PythonStr(path.to_string()))
            }

161
162
163
164
165
166
167
168
            #[cfg(feature = "python")]
            python_tok_gen if python_tok_gen.starts_with(crate::PYTHON_TOK_SCHEME) => {
                let path = python_tok_gen
                    .strip_prefix(crate::PYTHON_TOK_SCHEME)
                    .unwrap();
                Ok(Output::PythonTok(path.to_string()))
            }

169
170
171
172
173
174
175
176
            e => Err(anyhow::anyhow!("Invalid out= option '{e}'")),
        }
    }
}

impl fmt::Display for Output {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let s = match self {
177
178
179
            #[cfg(feature = "mistralrs")]
            Output::MistralRs => "mistralrs",

180
181
182
            #[cfg(feature = "sglang")]
            Output::SgLang => "sglang",

183
184
185
            #[cfg(feature = "llamacpp")]
            Output::LlamaCpp => "llamacpp",

Graham King's avatar
Graham King committed
186
187
188
            #[cfg(feature = "vllm")]
            Output::Vllm => "vllm",

Graham King's avatar
Graham King committed
189
190
191
            #[cfg(feature = "trtllm")]
            Output::TrtLLM => "trtllm",

192
            Output::EchoFull => "echo_full",
193
            Output::EchoCore => "echo_core",
194
195

            Output::Endpoint(path) => path,
196
197

            #[cfg(feature = "python")]
198
            Output::PythonStr(_) => "pystr",
199
200

            #[cfg(feature = "python")]
201
            Output::PythonTok(_) => "pytok",
202
203
204
205
        };
        write!(f, "{s}")
    }
}
206

207
208
209
/// Returns the engine to use if user did not say on cmd line.
/// Nearly always defaults to mistralrs which has no dependencies and we include by default.
/// If built with --no-default-features and a specific engine, default to that.
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
#[allow(unused_assignments, unused_mut)]
impl Default for Output {
    fn default() -> Self {
        // Default if no engines
        let mut out = Output::EchoFull;

        #[cfg(feature = "llamacpp")]
        {
            out = Output::LlamaCpp;
        }

        #[cfg(feature = "sglang")]
        {
            out = Output::SgLang;
        }

        #[cfg(feature = "vllm")]
        {
            out = Output::Vllm;
        }

231
232
233
234
235
        #[cfg(feature = "mistralrs")]
        {
            out = Output::MistralRs;
        }

236
237
238
        out
    }
}
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277

impl Output {
    #[allow(unused_mut)]
    pub fn available_engines() -> Vec<String> {
        let mut out = vec!["echo_core".to_string(), "echo_full".to_string()];
        #[cfg(feature = "mistralrs")]
        {
            out.push(Output::MistralRs.to_string());
        }

        #[cfg(feature = "llamacpp")]
        {
            out.push(Output::LlamaCpp.to_string());
        }

        #[cfg(feature = "sglang")]
        {
            out.push(Output::SgLang.to_string());
        }

        #[cfg(feature = "vllm")]
        {
            out.push(Output::Vllm.to_string());
        }

        #[cfg(feature = "python")]
        {
            out.push(Output::PythonStr("file.py".to_string()).to_string());
            out.push(Output::PythonTok("file.py".to_string()).to_string());
        }

        #[cfg(feature = "trtllm")]
        {
            out.push(Output::TrtLLM.to_string());
        }

        out
    }
}