opt.rs 6.57 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

16
use std::{fmt, io::IsTerminal as _};
17

18
19
use crate::ENDPOINT_SCHEME;

20
#[derive(PartialEq)]
21
22
23
24
pub enum Input {
    /// Run an OpenAI compatible HTTP server
    Http,

25
26
27
28
    /// Single prompt on stdin
    Stdin,

    /// Interactive chat
29
    Text,
30
31
32

    /// Pull requests from a namespace/component/endpoint path.
    Endpoint(String),
33
34
35
36
37

    /// Start the engine but don't provide any way to talk to it.
    /// For multi-node sglang, where the engine connects directly
    /// to the co-ordinator via torch distributed / nccl.
    None,
38
39
40
41
42
43
44
45
46
}

impl TryFrom<&str> for Input {
    type Error = anyhow::Error;

    fn try_from(s: &str) -> anyhow::Result<Self> {
        match s {
            "http" => Ok(Input::Http),
            "text" => Ok(Input::Text),
47
            "stdin" => Ok(Input::Stdin),
48
            "none" => Ok(Input::None),
49
50
51
52
            endpoint_path if endpoint_path.starts_with(ENDPOINT_SCHEME) => {
                let path = endpoint_path.strip_prefix(ENDPOINT_SCHEME).unwrap();
                Ok(Input::Endpoint(path.to_string()))
            }
53
54
55
56
57
58
59
60
61
62
            e => Err(anyhow::anyhow!("Invalid in= option '{e}'")),
        }
    }
}

impl fmt::Display for Input {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let s = match self {
            Input::Http => "http",
            Input::Text => "text",
63
            Input::Stdin => "stdin",
64
            Input::Endpoint(path) => path,
65
            Input::None => "none",
66
67
68
69
70
        };
        write!(f, "{s}")
    }
}

71
72
73
74
75
76
77
78
79
80
impl Default for Input {
    fn default() -> Self {
        if std::io::stdin().is_terminal() {
            Input::Text
        } else {
            Input::Stdin
        }
    }
}

81
82
83
pub enum Output {
    /// Accept un-preprocessed requests, echo the prompt back as the response
    EchoFull,
84

85
86
87
    /// Accept preprocessed requests, echo the tokens back as the response
    EchoCore,

88
89
90
    /// Publish requests to a namespace/component/endpoint path.
    Endpoint(String),

91
92
93
    #[cfg(feature = "mistralrs")]
    /// Run inference on a model in a GGUF file using mistralrs w/ candle
    MistralRs,
94
95
96
97

    #[cfg(feature = "sglang")]
    /// Run inference using sglang
    SgLang,
98
99
100
101

    #[cfg(feature = "llamacpp")]
    /// Run inference using llama.cpp
    LlamaCpp,
Graham King's avatar
Graham King committed
102
103
104
105

    #[cfg(feature = "vllm")]
    /// Run inference using vllm's engine
    Vllm,
Graham King's avatar
Graham King committed
106
107
108
109

    #[cfg(feature = "trtllm")]
    /// Run inference using trtllm
    TrtLLM,
110

111
112
    /// Run inference using a user supplied python file that accepts and returns
    /// strings. It does it's own pre-processing.
113
114
    #[cfg(feature = "python")]
    PythonStr(String),
115
116
117
118
119

    /// Run inference using a user supplied python file that accepts and returns
    /// tokens. We do the pre-processing.
    #[cfg(feature = "python")]
    PythonTok(String),
120
121
122
123
124
125
126
}

impl TryFrom<&str> for Output {
    type Error = anyhow::Error;

    fn try_from(s: &str) -> anyhow::Result<Self> {
        match s {
127
128
129
            #[cfg(feature = "mistralrs")]
            "mistralrs" => Ok(Output::MistralRs),

130
131
132
            #[cfg(feature = "sglang")]
            "sglang" => Ok(Output::SgLang),

133
134
135
            #[cfg(feature = "llamacpp")]
            "llamacpp" | "llama_cpp" => Ok(Output::LlamaCpp),

Graham King's avatar
Graham King committed
136
137
138
            #[cfg(feature = "vllm")]
            "vllm" => Ok(Output::Vllm),

Graham King's avatar
Graham King committed
139
140
141
            #[cfg(feature = "trtllm")]
            "trtllm" => Ok(Output::TrtLLM),

142
            "echo_full" => Ok(Output::EchoFull),
143
            "echo_core" => Ok(Output::EchoCore),
144
145
146
147
148
149

            endpoint_path if endpoint_path.starts_with(ENDPOINT_SCHEME) => {
                let path = endpoint_path.strip_prefix(ENDPOINT_SCHEME).unwrap();
                Ok(Output::Endpoint(path.to_string()))
            }

150
151
152
153
154
155
156
157
            #[cfg(feature = "python")]
            python_str_gen if python_str_gen.starts_with(crate::PYTHON_STR_SCHEME) => {
                let path = python_str_gen
                    .strip_prefix(crate::PYTHON_STR_SCHEME)
                    .unwrap();
                Ok(Output::PythonStr(path.to_string()))
            }

158
159
160
161
162
163
164
165
            #[cfg(feature = "python")]
            python_tok_gen if python_tok_gen.starts_with(crate::PYTHON_TOK_SCHEME) => {
                let path = python_tok_gen
                    .strip_prefix(crate::PYTHON_TOK_SCHEME)
                    .unwrap();
                Ok(Output::PythonTok(path.to_string()))
            }

166
167
168
169
170
171
172
173
            e => Err(anyhow::anyhow!("Invalid out= option '{e}'")),
        }
    }
}

impl fmt::Display for Output {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let s = match self {
174
175
176
            #[cfg(feature = "mistralrs")]
            Output::MistralRs => "mistralrs",

177
178
179
            #[cfg(feature = "sglang")]
            Output::SgLang => "sglang",

180
181
182
            #[cfg(feature = "llamacpp")]
            Output::LlamaCpp => "llamacpp",

Graham King's avatar
Graham King committed
183
184
185
            #[cfg(feature = "vllm")]
            Output::Vllm => "vllm",

Graham King's avatar
Graham King committed
186
187
188
            #[cfg(feature = "trtllm")]
            Output::TrtLLM => "trtllm",

189
            Output::EchoFull => "echo_full",
190
            Output::EchoCore => "echo_core",
191
192

            Output::Endpoint(path) => path,
193
194
195

            #[cfg(feature = "python")]
            Output::PythonStr(path) => path,
196
197
198

            #[cfg(feature = "python")]
            Output::PythonTok(path) => path,
199
200
201
202
        };
        write!(f, "{s}")
    }
}
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235

/// Returns the engine to use if user did not say on cmd line
/// Uses whatever was compiled in, with a priority ordering.
#[allow(unused_assignments, unused_mut)]
impl Default for Output {
    fn default() -> Self {
        // Default if no engines
        let mut out = Output::EchoFull;

        // Runs everywhere but needs local CUDA to build
        #[cfg(feature = "mistralrs")]
        {
            out = Output::MistralRs;
        }

        #[cfg(feature = "llamacpp")]
        {
            out = Output::LlamaCpp;
        }

        #[cfg(feature = "sglang")]
        {
            out = Output::SgLang;
        }

        #[cfg(feature = "vllm")]
        {
            out = Output::Vllm;
        }

        out
    }
}