lib.rs 5.18 KB
Newer Older
1
/// Text Generation Inference Webserver
2
mod infer;
3
mod queue;
Olivier Dehaene's avatar
Olivier Dehaene committed
4
pub mod server;
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
5
mod validation;
Olivier Dehaene's avatar
Olivier Dehaene committed
6

7
use infer::Infer;
8
use queue::{Entry, Queue};
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
9
use serde::{Deserialize, Serialize};
10
use utoipa::ToSchema;
Olivier Dehaene's avatar
Olivier Dehaene committed
11
use validation::Validation;
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
12

13
#[derive(Clone, Debug, Deserialize, ToSchema)]
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
14
pub(crate) struct GenerateParameters {
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
    #[serde(default)]
    #[schema(
        exclusive_minimum = 0.0,
        nullable = true,
        default = "null",
        example = 0.5
    )]
    pub temperature: Option<f32>,
    #[serde(default)]
    #[schema(
        exclusive_minimum = 0.0,
        nullable = true,
        default = "null",
        example = 1.03
    )]
    pub repetition_penalty: Option<f32>,
    #[serde(default)]
    #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 10)]
    pub top_k: Option<i32>,
    #[serde(default)]
    #[schema(
        exclusive_minimum = 0.0,
        maximum = 1.0,
        nullable = true,
        default = "null",
        example = 0.95
    )]
    pub top_p: Option<f32>,
43
    #[serde(default)]
44
45
46
47
48
49
50
51
52
    #[schema(
        exclusive_minimum = 0.0,
        maximum = 1.0,
        nullable = true,
        default = "null",
        example = 0.95
    )]
    pub typical_p: Option<f32>,
    #[serde(default)]
53
    #[schema(default = "false", example = true)]
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
54
55
    pub do_sample: bool,
    #[serde(default = "default_max_new_tokens")]
56
    #[schema(exclusive_minimum = 0, exclusive_maximum = 512, default = "20")]
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
57
    pub max_new_tokens: u32,
OlivierDehaene's avatar
OlivierDehaene committed
58
    #[serde(default)]
59
    #[schema(default = "null", example = false)]
60
61
    pub return_full_text: Option<bool>,
    #[serde(default)]
62
    #[schema(inline, max_items = 4, example = json ! (["photographer"]))]
63
    pub stop: Vec<String>,
OlivierDehaene's avatar
OlivierDehaene committed
64
    #[serde(default)]
65
66
67
    #[schema(default = "null", example = "null")]
    pub truncate: Option<usize>,
    #[serde(default)]
68
69
70
    #[schema(default = "false", example = true)]
    pub watermark: bool,
    #[serde(default)]
71
    #[schema(default = "true")]
OlivierDehaene's avatar
OlivierDehaene committed
72
    pub details: bool,
73
74
    #[serde(default)]
    pub seed: Option<u64>,
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
75
76
77
78
79
80
81
82
}

fn default_max_new_tokens() -> u32 {
    20
}

fn default_parameters() -> GenerateParameters {
    GenerateParameters {
83
84
85
86
        temperature: None,
        repetition_penalty: None,
        top_k: None,
        top_p: None,
87
        typical_p: None,
88
        do_sample: false,
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
89
        max_new_tokens: default_max_new_tokens(),
90
        return_full_text: None,
91
        stop: Vec::new(),
92
        truncate: None,
93
        watermark: false,
OlivierDehaene's avatar
OlivierDehaene committed
94
        details: false,
95
        seed: None,
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
96
97
98
    }
}

99
#[derive(Clone, Debug, Deserialize, ToSchema)]
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
100
pub(crate) struct GenerateRequest {
101
    #[schema(example = "My name is Olivier and I")]
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
102
103
104
105
106
    pub inputs: String,
    #[serde(default = "default_parameters")]
    pub parameters: GenerateParameters,
}

107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#[derive(Clone, Debug, Deserialize, ToSchema)]
pub(crate) struct CompatGenerateRequest {
    #[schema(example = "My name is Olivier and I")]
    pub inputs: String,
    #[serde(default = "default_parameters")]
    pub parameters: GenerateParameters,
    #[serde(default)]
    #[allow(dead_code)]
    pub stream: bool,
}

impl From<CompatGenerateRequest> for GenerateRequest {
    fn from(req: CompatGenerateRequest) -> Self {
        Self {
            inputs: req.inputs,
            parameters: req.parameters,
        }
    }
}

127
128
129
130
131
132
#[derive(Debug, Serialize, ToSchema)]
pub struct PrefillToken {
    #[schema(example = 0)]
    id: u32,
    #[schema(example = "test")]
    text: String,
133
    #[schema(nullable = true, example = - 0.34)]
134
135
136
    logprob: f32,
}

137
138
139
140
141
142
#[derive(Debug, Serialize, ToSchema)]
pub struct Token {
    #[schema(example = 0)]
    id: u32,
    #[schema(example = "test")]
    text: String,
143
    #[schema(nullable = true, example = - 0.34)]
144
    logprob: f32,
145
146
    #[schema(example = "false")]
    special: bool,
147
148
149
150
151
152
153
154
155
156
157
158
159
}

#[derive(Serialize, ToSchema)]
#[serde(rename_all(serialize = "snake_case"))]
pub(crate) enum FinishReason {
    #[schema(rename = "length")]
    Length,
    #[serde(rename = "eos_token")]
    #[schema(rename = "eos_token")]
    EndOfSequenceToken,
    #[schema(rename = "stop_sequence")]
    StopSequence,
}
160

161
#[derive(Serialize, ToSchema)]
OlivierDehaene's avatar
OlivierDehaene committed
162
pub(crate) struct Details {
163
164
165
    #[schema(example = "length")]
    pub finish_reason: FinishReason,
    #[schema(example = 1)]
OlivierDehaene's avatar
OlivierDehaene committed
166
    pub generated_tokens: u32,
167
    #[schema(example = 42)]
168
    pub seed: Option<u64>,
169
170
    pub prefill: Vec<PrefillToken>,
    pub tokens: Vec<Token>,
OlivierDehaene's avatar
OlivierDehaene committed
171
172
}

173
#[derive(Serialize, ToSchema)]
174
pub(crate) struct GenerateResponse {
175
    #[schema(example = "test")]
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
176
    pub generated_text: String,
OlivierDehaene's avatar
OlivierDehaene committed
177
178
    #[serde(skip_serializing_if = "Option::is_none")]
    pub details: Option<Details>,
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
179
}
180

181
182
183
184
185
186
187
188
189
190
191
#[derive(Serialize, ToSchema)]
pub(crate) struct StreamDetails {
    #[schema(example = "length")]
    pub finish_reason: FinishReason,
    #[schema(example = 1)]
    pub generated_tokens: u32,
    #[schema(example = 42)]
    pub seed: Option<u64>,
}

#[derive(Serialize, ToSchema)]
192
193
pub(crate) struct StreamResponse {
    pub token: Token,
194
    #[schema(nullable = true, default = "null", example = "test")]
195
    pub generated_text: Option<String>,
196
197
    #[schema(nullable = true, default = "null")]
    pub details: Option<StreamDetails>,
198
199
}

200
#[derive(Serialize, ToSchema)]
201
202
pub(crate) struct ErrorResponse {
    pub error: String,
203
    pub error_type: String,
204
}