request_formats_test.rs 10.3 KB
Newer Older
1
2
3
4
5
mod common;

use common::mock_worker::{HealthStatus, MockWorker, MockWorkerConfig, WorkerType};
use reqwest::Client;
use serde_json::json;
6
use sglang_router_rs::config::{RouterConfig, RoutingMode};
7
use sglang_router_rs::core::WorkerManager;
8
9
use sglang_router_rs::routers::{RouterFactory, RouterTrait};
use std::sync::Arc;
10

11
12
/// Test context that manages mock workers
struct TestContext {
13
    workers: Vec<MockWorker>,
14
15
    _router: Arc<dyn RouterTrait>,
    worker_urls: Vec<String>,
16
17
}

18
impl TestContext {
19
    async fn new(worker_configs: Vec<MockWorkerConfig>) -> Self {
20
        let mut config = RouterConfig {
21
            chat_template: None,
22
23
24
            mode: RoutingMode::Regular {
                worker_urls: vec![],
            },
25
            port: 3003,
26
27
            worker_startup_timeout_secs: 1,
            worker_startup_check_interval_secs: 1,
28
            ..Default::default()
29
30
        };

31
32
        let mut workers = Vec::new();
        let mut worker_urls = Vec::new();
33

34
35
36
37
38
39
        for worker_config in worker_configs {
            let mut worker = MockWorker::new(worker_config);
            let url = worker.start().await.unwrap();
            worker_urls.push(url);
            workers.push(worker);
        }
40

41
42
        if !workers.is_empty() {
            tokio::time::sleep(tokio::time::Duration::from_millis(200)).await;
43
44
        }

45
46
47
48
49
50
51
52
        config.mode = RoutingMode::Regular {
            worker_urls: worker_urls.clone(),
        };

        let app_context = common::create_test_context(config.clone());

        // Initialize workers in the registry before creating router
        if !worker_urls.is_empty() {
53
            WorkerManager::initialize_workers(&config, &app_context.worker_registry, None)
54
55
56
                .await
                .expect("Failed to initialize workers");
        }
57

58
        let router = RouterFactory::create_router(&app_context).await.unwrap();
59
        let router = Arc::from(router);
60

61
62
63
64
        if !workers.is_empty() {
            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
        }

65
66
67
68
69
        Self {
            workers,
            _router: router,
            worker_urls: worker_urls.clone(),
        }
70
71
72
    }

    async fn shutdown(mut self) {
73
74
75
        // Small delay to ensure any pending operations complete
        tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;

76
77
78
        for worker in &mut self.workers {
            worker.stop().await;
        }
79
80
81
82
83
84
85
86
87
88
89
90

        // Another small delay to ensure cleanup completes
        tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
    }

    async fn make_request(
        &self,
        endpoint: &str,
        body: serde_json::Value,
    ) -> Result<serde_json::Value, String> {
        let client = Client::new();

91
92
93
94
95
        // Use the first worker URL from the context
        let worker_url = self
            .worker_urls
            .first()
            .ok_or_else(|| "No workers available".to_string())?;
96
97

        let response = client
98
            .post(format!("{}{}", worker_url, endpoint))
99
100
101
102
103
104
105
106
107
108
109
110
111
            .json(&body)
            .send()
            .await
            .map_err(|e| format!("Request failed: {}", e))?;

        if !response.status().is_success() {
            return Err(format!("Request failed with status: {}", response.status()));
        }

        response
            .json::<serde_json::Value>()
            .await
            .map_err(|e| format!("Failed to parse response: {}", e))
112
113
114
115
    }
}

#[cfg(test)]
116
mod request_format_tests {
117
118
    use super::*;

119
120
121
122
123
124
125
126
127
128
129
130
131
132
    #[tokio::test]
    async fn test_generate_request_formats() {
        let ctx = TestContext::new(vec![MockWorkerConfig {
            port: 19001,
            worker_type: WorkerType::Regular,
            health_status: HealthStatus::Healthy,
            response_delay_ms: 0,
            fail_rate: 0.0,
        }])
        .await;

        let payload = json!({
            "text": "Hello, world!",
            "stream": false
133
134
        });

135
136
        let result = ctx.make_request("/generate", payload).await;
        assert!(result.is_ok());
137

138
139
140
        let payload = json!({
            "text": "Tell me a story",
            "sampling_params": {
141
142
                "temperature": 0.7,
                "max_new_tokens": 100,
143
144
145
                "top_p": 0.9
            },
            "stream": false
146
147
        });

148
149
        let result = ctx.make_request("/generate", payload).await;
        assert!(result.is_ok());
150

151
152
153
154
155
156
157
        let payload = json!({
            "input_ids": [1, 2, 3, 4, 5],
            "sampling_params": {
                "temperature": 0.0,
                "max_new_tokens": 50
            },
            "stream": false
158
        });
159
160
161
162
163

        let result = ctx.make_request("/generate", payload).await;
        assert!(result.is_ok());

        ctx.shutdown().await;
164
165
    }

166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
    #[tokio::test]
    async fn test_v1_chat_completions_formats() {
        let ctx = TestContext::new(vec![MockWorkerConfig {
            port: 19002,
            worker_type: WorkerType::Regular,
            health_status: HealthStatus::Healthy,
            response_delay_ms: 0,
            fail_rate: 0.0,
        }])
        .await;

        let payload = json!({
            "model": "test-model",
            "messages": [
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": "Hello!"}
            ],
            "stream": false
184
        });
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211

        let result = ctx.make_request("/v1/chat/completions", payload).await;
        assert!(result.is_ok());

        let response = result.unwrap();
        assert!(response.get("choices").is_some());
        assert!(response.get("id").is_some());
        assert_eq!(
            response.get("object").and_then(|v| v.as_str()),
            Some("chat.completion")
        );

        let payload = json!({
            "model": "test-model",
            "messages": [
                {"role": "user", "content": "Tell me a joke"}
            ],
            "temperature": 0.8,
            "max_tokens": 150,
            "top_p": 0.95,
            "stream": false
        });

        let result = ctx.make_request("/v1/chat/completions", payload).await;
        assert!(result.is_ok());

        ctx.shutdown().await;
212
213
    }

214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
    #[tokio::test]
    async fn test_v1_completions_formats() {
        let ctx = TestContext::new(vec![MockWorkerConfig {
            port: 19003,
            worker_type: WorkerType::Regular,
            health_status: HealthStatus::Healthy,
            response_delay_ms: 0,
            fail_rate: 0.0,
        }])
        .await;

        let payload = json!({
            "model": "test-model",
            "prompt": "Once upon a time",
            "max_tokens": 50,
            "stream": false
        });
231

232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
        let result = ctx.make_request("/v1/completions", payload).await;
        assert!(result.is_ok());

        let response = result.unwrap();
        assert!(response.get("choices").is_some());
        assert_eq!(
            response.get("object").and_then(|v| v.as_str()),
            Some("text_completion")
        );

        let payload = json!({
            "model": "test-model",
            "prompt": ["First prompt", "Second prompt"],
            "temperature": 0.5,
            "stream": false
247
248
        });

249
250
251
252
253
254
255
256
257
        let result = ctx.make_request("/v1/completions", payload).await;
        assert!(result.is_ok());

        let payload = json!({
            "model": "test-model",
            "prompt": "The capital of France is",
            "max_tokens": 10,
            "logprobs": 5,
            "stream": false
258
        });
259
260
261
262
263

        let result = ctx.make_request("/v1/completions", payload).await;
        assert!(result.is_ok());

        ctx.shutdown().await;
264
265
    }

266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
    #[tokio::test]
    async fn test_batch_requests() {
        let ctx = TestContext::new(vec![MockWorkerConfig {
            port: 19004,
            worker_type: WorkerType::Regular,
            health_status: HealthStatus::Healthy,
            response_delay_ms: 0,
            fail_rate: 0.0,
        }])
        .await;

        let payload = json!({
            "text": ["First text", "Second text", "Third text"],
            "sampling_params": {
                "temperature": 0.7,
                "max_new_tokens": 50
            },
            "stream": false
284
285
        });

286
287
288
289
290
291
        let result = ctx.make_request("/generate", payload).await;
        assert!(result.is_ok());

        let payload = json!({
            "input_ids": [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
            "stream": false
292
        });
293
294
295
296
297

        let result = ctx.make_request("/generate", payload).await;
        assert!(result.is_ok());

        ctx.shutdown().await;
298
299
    }

300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
    #[tokio::test]
    async fn test_special_parameters() {
        let ctx = TestContext::new(vec![MockWorkerConfig {
            port: 19005,
            worker_type: WorkerType::Regular,
            health_status: HealthStatus::Healthy,
            response_delay_ms: 0,
            fail_rate: 0.0,
        }])
        .await;

        let payload = json!({
            "text": "Test",
            "return_logprob": true,
            "stream": false
315
316
        });

317
318
        let result = ctx.make_request("/generate", payload).await;
        assert!(result.is_ok());
319

320
321
322
323
324
325
326
        let payload = json!({
            "text": "Generate JSON",
            "sampling_params": {
                "temperature": 0.0,
                "json_schema": "$$ANY$$"
            },
            "stream": false
327
328
        });

329
330
331
332
333
334
335
336
337
338
339
        let result = ctx.make_request("/generate", payload).await;
        assert!(result.is_ok());

        let payload = json!({
            "text": "Continue forever",
            "sampling_params": {
                "temperature": 0.7,
                "max_new_tokens": 100,
                "ignore_eos": true
            },
            "stream": false
340
        });
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365

        let result = ctx.make_request("/generate", payload).await;
        assert!(result.is_ok());

        ctx.shutdown().await;
    }

    #[tokio::test]
    async fn test_error_handling() {
        let ctx = TestContext::new(vec![MockWorkerConfig {
            port: 19006,
            worker_type: WorkerType::Regular,
            health_status: HealthStatus::Healthy,
            response_delay_ms: 0,
            fail_rate: 0.0,
        }])
        .await;

        let payload = json!({});

        let result = ctx.make_request("/generate", payload).await;
        // Mock worker accepts empty body
        assert!(result.is_ok());

        ctx.shutdown().await;
366
367
    }
}