queue.rs 26.3 KB
Newer Older
Nicolas Patry's avatar
Nicolas Patry committed
1
2
3
4
use crate::block_allocator::{BlockAllocation, BlockAllocator};
use crate::client;
use crate::client::{
    Batch, GrammarType, NextTokenChooserParameters, Request, StoppingCriteriaParameters,
OlivierDehaene's avatar
OlivierDehaene committed
5
};
6
use nohash_hasher::{BuildNoHashHasher, IntMap};
7
use std::cmp::{max, min};
8
use std::collections::VecDeque;
Nicolas Patry's avatar
Nicolas Patry committed
9
10
11
12
13
use text_generation_router::infer::InferError;
use text_generation_router::infer::InferStreamResponse;
use text_generation_router::validation::{
    Chunk, ChunksToString, ValidGenerateRequest, ValidGrammar, ValidParameters,
    ValidStoppingParameters,
OlivierDehaene's avatar
OlivierDehaene committed
14
};
OlivierDehaene's avatar
OlivierDehaene committed
15
use tokio::sync::{mpsc, oneshot};
16
use tokio::time::Instant;
17
use tracing::{info_span, instrument, Instrument, Span};
18
19
20
21
22
23
24

/// Queue entry
#[derive(Debug)]
pub(crate) struct Entry {
    /// Request
    pub request: ValidGenerateRequest,
    /// Response sender to communicate between the Infer struct and the batching_task
OlivierDehaene's avatar
OlivierDehaene committed
25
    pub response_tx: mpsc::UnboundedSender<Result<InferStreamResponse, InferError>>,
26
27
28
29
30
31
    /// Span that will live as long as entry
    pub span: Span,
    /// Temporary span used as a guard when logging inference, wait times...
    pub temp_span: Option<Span>,
    /// Instant when this entry was queued
    pub queue_time: Instant,
32
33
    /// Instant when this entry was added to a batch
    pub batch_time: Option<Instant>,
34
35
    /// Block Allocation
    pub block_allocation: Option<BlockAllocation>,
36
37
38
39
40
41
}

/// Request Queue
#[derive(Debug, Clone)]
pub(crate) struct Queue {
    /// Channel to communicate with the background queue task
OlivierDehaene's avatar
OlivierDehaene committed
42
    queue_sender: mpsc::UnboundedSender<QueueCommand>,
43
44
45
}

impl Queue {
Nicolas Patry's avatar
Nicolas Patry committed
46
47
48
    pub(crate) fn new(
        requires_padding: bool,
        block_size: u32,
49
        prefix_caching: bool,
Nicolas Patry's avatar
Nicolas Patry committed
50
51
        window_size: Option<u32>,
        speculate: u32,
52
        max_batch_total_tokens: u32,
Nicolas Patry's avatar
Nicolas Patry committed
53
    ) -> Self {
54
        // Create channel
OlivierDehaene's avatar
OlivierDehaene committed
55
        let (queue_sender, queue_receiver) = mpsc::unbounded_channel();
56
57

        // Launch background queue task
58
59
60
        tokio::spawn(queue_task(
            requires_padding,
            block_size,
61
            prefix_caching,
62
            window_size,
Nicolas Patry's avatar
Nicolas Patry committed
63
            speculate,
64
            max_batch_total_tokens,
65
66
            queue_receiver,
        ));
67
68
69
70

        Self { queue_sender }
    }

71
    /// Append an entry to the queue
72
    #[instrument(skip_all)]
73
74
75
    pub(crate) fn append(&self, entry: Entry) {
        // Send append command to the background task managing the state
        // Unwrap is safe here
76
        self.queue_sender
77
            .send(QueueCommand::Append(Box::new(entry), Span::current()))
78
            .unwrap();
79
80
81
    }

    // Get the next batch
82
    #[instrument(skip(self))]
83
84
85
    pub(crate) async fn next_batch(
        &self,
        min_size: Option<usize>,
86
        max_size: Option<usize>,
87
        prefill_token_budget: u32,
88
        token_budget: u32,
89
90
91
92
93
94
95
96
    ) -> Option<NextBatch> {
        // Create response channel
        let (response_sender, response_receiver) = oneshot::channel();
        // Send next batch command to the background task managing the state
        // Unwrap is safe here
        self.queue_sender
            .send(QueueCommand::NextBatch {
                min_size,
97
                max_size,
98
                prefill_token_budget,
99
                token_budget,
100
                response_sender,
101
                span: Span::current(),
102
103
104
105
106
107
108
109
110
            })
            .unwrap();
        // Await on response channel
        // Unwrap is safe here
        response_receiver.await.unwrap()
    }
}

// Background task responsible of the queue state
111
112
113
async fn queue_task(
    requires_padding: bool,
    block_size: u32,
114
    prefix_caching: bool,
115
    window_size: Option<u32>,
Nicolas Patry's avatar
Nicolas Patry committed
116
    speculate: u32,
117
    max_batch_total_tokens: u32,
OlivierDehaene's avatar
OlivierDehaene committed
118
    mut receiver: mpsc::UnboundedReceiver<QueueCommand>,
119
) {
120
121
122
    let mut state = State::new(
        requires_padding,
        block_size,
123
        prefix_caching,
124
125
126
127
        window_size,
        speculate,
        max_batch_total_tokens,
    );
128

OlivierDehaene's avatar
OlivierDehaene committed
129
    while let Some(cmd) = receiver.recv().await {
130
        match cmd {
131
            QueueCommand::Append(entry, span) => {
132
                span.in_scope(|| state.append(*entry));
133
                metrics::gauge!("tgi_queue_size").increment(1.0);
134
            }
135
136
            QueueCommand::NextBatch {
                min_size,
137
                max_size,
138
                prefill_token_budget,
139
                token_budget,
140
                response_sender,
141
                span,
142
143
144
145
146
            } => {
                let next_batch = state
                    .next_batch(min_size, max_size, prefill_token_budget, token_budget)
                    .instrument(span)
                    .await;
147
                response_sender.send(next_batch).unwrap();
148
                metrics::gauge!("tgi_queue_size").set(state.entries.len() as f64);
149
            }
150
151
152
153
154
155
156
157
        }
    }
}

/// Queue State
#[derive(Debug)]
struct State {
    /// Queue entries organized in a Vec
158
    entries: VecDeque<(u64, Entry)>,
159
160
161
162
163
164

    /// Id of the next entry
    next_id: u64,

    /// Id of the next batch
    next_batch_id: u64,
165

166
167
    /// Paged Attention block size
    block_size: u32,
168
169
170

    /// Sliding window
    window_size: Option<u32>,
Nicolas Patry's avatar
Nicolas Patry committed
171
172
173

    /// Speculation amount
    speculate: u32,
174
175
176

    /// Paged Attention Block Allocation
    block_allocator: Option<BlockAllocator>,
177
178
179
}

impl State {
Nicolas Patry's avatar
Nicolas Patry committed
180
181
182
    fn new(
        requires_padding: bool,
        block_size: u32,
183
        prefix_caching: bool,
Nicolas Patry's avatar
Nicolas Patry committed
184
185
        window_size: Option<u32>,
        speculate: u32,
186
        max_batch_total_tokens: u32,
Nicolas Patry's avatar
Nicolas Patry committed
187
    ) -> Self {
188
189
190
191
192
193
194
195
        let block_allocator = (!requires_padding).then(|| {
            BlockAllocator::new(
                max_batch_total_tokens,
                block_size,
                prefix_caching,
                window_size,
            )
        });
196

197
        Self {
198
            entries: VecDeque::with_capacity(128),
199
200
            next_id: 0,
            next_batch_id: 0,
201
            block_size,
202
            window_size,
Nicolas Patry's avatar
Nicolas Patry committed
203
            speculate,
204
            block_allocator,
205
206
207
208
        }
    }

    /// Append an entry to the queue
209
210
211
212
213
214
    fn append(&mut self, mut entry: Entry) {
        // Create a span that will live as long as the entry is in the queue waiting to be batched
        let queue_span = info_span!(parent: &entry.span, "queued");
        entry.temp_span = Some(queue_span);

        // Push entry in the queue
215
        self.entries.push_back((self.next_id, entry));
216
217
218
219
        self.next_id += 1;
    }

    // Get the next batch
220
    async fn next_batch(
221
222
        &mut self,
        min_size: Option<usize>,
223
        max_size: Option<usize>,
224
225
226
        prefill_token_budget: u32,
        token_budget: u32,
    ) -> Option<NextBatch> {
227
        if self.entries.is_empty() {
228
            tracing::debug!("No queue");
229
230
231
232
233
234
            return None;
        }

        // Check if we have enough entries
        if let Some(min_size) = min_size {
            if self.entries.len() < min_size {
235
                tracing::debug!("Not enough entries");
236
237
238
239
                return None;
            }
        }

drbh's avatar
drbh committed
240
241
242
243
244
245
246
        if let Some(max_size) = max_size {
            if max_size == 0 {
                tracing::debug!("No capacity");
                return None;
            }
        }

247
248
249
250
        // Pad prefill_token_budget to be a multiple of block size
        let prefill_token_budget =
            ((prefill_token_budget + self.block_size - 1) / self.block_size) * self.block_size;

251
        // Create span for this batch to add context to inference calls
252
        let next_batch_span = info_span!(parent: None, "batch", batch_size = tracing::field::Empty);
253
254
        next_batch_span.follows_from(&Span::current());

255
        let mut batch_requests = Vec::with_capacity(self.entries.len());
256
        let mut batch_entries =
257
            IntMap::with_capacity_and_hasher(self.entries.len(), BuildNoHashHasher::default());
258

259
260
261
        let mut max_input_length = 0;
        let mut prefill_tokens: u32 = 0;
        let mut decode_tokens: u32 = 0;
262
        let mut max_blocks = 0;
263
264

        // Pop entries starting from the front of the queue
265
        'entry_loop: while let Some((id, mut entry)) = self.entries.pop_front() {
266
267
            // Filter entries where the response receiver was dropped (== entries where the request
            // was dropped by the client)
OlivierDehaene's avatar
OlivierDehaene committed
268
            if entry.response_tx.is_closed() {
269
                metrics::counter!("tgi_request_failure", "err" => "dropped").increment(1);
270
                tracing::debug!("Dropping entry");
271
272
273
                continue;
            }

274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
            let block_allocation = match &self.block_allocator {
                None => {
                    // We pad to max input length in the Python shards
                    // We need to take these padding tokens into the equation
                    max_input_length = max_input_length.max(entry.request.input_length);
                    prefill_tokens = (batch_requests.len() + 1) as u32 * max_input_length;

                    decode_tokens += entry.request.stopping_parameters.max_new_tokens;
                    let total_tokens = prefill_tokens + decode_tokens + self.speculate;

                    if prefill_tokens > prefill_token_budget || total_tokens > token_budget {
                        // Entry is over budget
                        // Add it back to the front
                        tracing::debug!("Over budget: prefill_tokens={prefill_tokens} > {prefill_token_budget} || {prefill_tokens} + {decode_tokens} + {} > {token_budget}", self.speculate);
                        self.entries.push_front((id, entry));
                        break 'entry_loop;
                    }
                    None
                }
                Some(block_allocator) => {
                    prefill_tokens += entry.request.input_length;
                    let max_new_tokens = match self.window_size {
                        None => entry.request.stopping_parameters.max_new_tokens,
                        Some(window_size) => min(
                            window_size.saturating_sub(entry.request.input_length),
                            entry.request.stopping_parameters.max_new_tokens,
                        ),
                    };
                    decode_tokens += max_new_tokens;

                    if prefill_tokens > prefill_token_budget
                        || (prefill_tokens + decode_tokens + self.speculate) > token_budget
                    {
                        // Entry is over budget
                        // Add it back to the front
                        tracing::debug!("Over budget: prefill_tokens={prefill_tokens} > {prefill_token_budget} || {prefill_tokens} + {decode_tokens} + {} > {token_budget}", self.speculate);
                        self.entries.push_front((id, entry));
                        break;
                    }

                    let tokens = entry.request.input_length
                        + entry.request.stopping_parameters.max_new_tokens
                        + self.speculate
                        - 1;

319
320
321
322
                    match block_allocator
                        .allocate(tokens, entry.request.input_ids.clone())
                        .await
                    {
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
                        None => {
                            // Entry is over budget
                            // Add it back to the front
                            tracing::debug!("Over budget: not enough free blocks");
                            self.entries.push_front((id, entry));
                            break 'entry_loop;
                        }
                        Some(block_allocation) => {
                            tracing::debug!("Allocation: {block_allocation:?}");
                            max_blocks = max(max_blocks, block_allocation.blocks.len() as u32);
                            Some(block_allocation)
                        }
                    }
                }
            };
338

339
            tracing::debug!("Accepting entry");
340
341
342
343
344
345
346
347
            // Create a new span to link the batch back to this entry
            let entry_batch_span = info_span!(parent: &entry.span, "infer");
            // Add relationships
            next_batch_span.follows_from(&entry_batch_span);
            entry_batch_span.follows_from(&next_batch_span);
            // Update entry
            entry.temp_span = Some(entry_batch_span);

348
349
            let (blocks, slots, prefix_len) = match &block_allocation {
                None => (Vec::new(), Vec::new(), 0),
350
351
352
                Some(block_allocation) => (
                    block_allocation.blocks.clone(),
                    block_allocation.slots.clone(),
353
                    block_allocation.prefix_len,
354
355
356
357
358
                ),
            };

            entry.block_allocation = block_allocation;

359
360
            batch_requests.push(Request {
                id,
361
                prefill_logprobs: entry.request.decoder_input_details,
Nicolas Patry's avatar
Nicolas Patry committed
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
                input_chunks: Some(client::Input {
                    chunks: entry
                        .request
                        .inputs
                        .clone()
                        .into_iter()
                        .map(|c| client::InputChunk {
                            chunk: Some(match c {
                                Chunk::Text(text) => client::Chunk::Text(text),
                                Chunk::Image(image) => client::Chunk::Image(client::Image {
                                    data: image.data,
                                    mimetype: image.mimetype,
                                }),
                            }),
                        })
                        .collect(),
378
                }),
379
                inputs: entry.request.inputs.chunks_to_string(),
380
                truncate: entry.request.truncate,
OlivierDehaene's avatar
OlivierDehaene committed
381
382
383
384
385
386
                parameters: Some(NextTokenChooserParameters::from(
                    entry.request.parameters.clone(),
                )),
                stopping_parameters: Some(StoppingCriteriaParameters::from(
                    entry.request.stopping_parameters.clone(),
                )),
Nicolas Patry's avatar
Nicolas Patry committed
387
                top_n_tokens: entry.request.top_n_tokens,
388
389
                blocks,
                slots,
390
                prefix_len,
drbh's avatar
drbh committed
391
                adapter_id: entry.request.adapter_id.clone(),
392
            });
393
394
395
396
            // Set batch_time
            entry.batch_time = Some(Instant::now());
            // Insert in batch_entries IntMap
            batch_entries.insert(id, entry);
397
398
399
400
401

            // Check if max_size
            if Some(batch_requests.len()) == max_size {
                break;
            }
402
403
        }

404
        // Empty batch
405
        if batch_requests.is_empty() {
406
            tracing::debug!("Filterered out all entries");
407
408
409
            return None;
        }

410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
        // Check if our batch is big enough
        if let Some(min_size) = min_size {
            // Batch is too small
            if batch_requests.len() < min_size {
                // Add back entries to the queue in the correct order
                for r in batch_requests.into_iter().rev() {
                    let id = r.id;
                    let entry = batch_entries.remove(&id).unwrap();
                    self.entries.push_front((id, entry));
                }

                return None;
            }
        }

        // Final batch size
426
427
        let size = batch_requests.len() as u32;
        next_batch_span.record("batch_size", size);
428
429
430
431

        let batch = Batch {
            id: self.next_batch_id,
            requests: batch_requests,
432
            size,
433
            max_tokens: (prefill_tokens + decode_tokens),
434
            max_blocks,
435
436
437
438
        };
        // Increment batch id
        self.next_batch_id += 1;

439
        metrics::histogram!("tgi_batch_next_size").record(batch.size as f64);
440

441
        Some((batch_entries, batch, next_batch_span))
442
443
444
    }
}

445
type NextBatch = (IntMap<u64, Entry>, Batch, Span);
446
447
448

#[derive(Debug)]
enum QueueCommand {
449
    Append(Box<Entry>, Span),
450
451
    NextBatch {
        min_size: Option<usize>,
452
        max_size: Option<usize>,
453
        prefill_token_budget: u32,
454
        token_budget: u32,
455
        response_sender: oneshot::Sender<Option<NextBatch>>,
456
        span: Span,
457
458
459
    },
}

OlivierDehaene's avatar
OlivierDehaene committed
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
impl From<ValidParameters> for NextTokenChooserParameters {
    fn from(value: ValidParameters) -> Self {
        let (grammar, grammar_type) = match value.grammar {
            None => (String::new(), GrammarType::None),

            Some(grammar) => match grammar {
                ValidGrammar::Json(grammar_string) => (grammar_string, GrammarType::Json),
                ValidGrammar::Regex(grammar_string) => (grammar_string, GrammarType::Regex),
            },
        };

        Self {
            temperature: value.temperature,
            top_k: value.top_k,
            top_p: value.top_p,
            typical_p: value.typical_p,
            do_sample: value.do_sample,
            seed: value.seed,
            repetition_penalty: value.repetition_penalty,
            frequency_penalty: value.frequency_penalty,
            watermark: value.watermark,
            grammar,
            grammar_type: grammar_type.into(),
        }
    }
}

impl From<ValidStoppingParameters> for StoppingCriteriaParameters {
    fn from(value: ValidStoppingParameters) -> Self {
        Self {
            max_new_tokens: value.max_new_tokens,
            stop_sequences: value.stop_sequences,
            ignore_eos_token: value.ignore_eos_token,
        }
    }
}

497
498
#[cfg(test)]
mod tests {
499
500
    use std::sync::Arc;

501
    use super::*;
502
    use tracing::info_span;
503

504
505
    fn default_entry() -> (
        Entry,
OlivierDehaene's avatar
OlivierDehaene committed
506
        mpsc::UnboundedReceiver<Result<InferStreamResponse, InferError>>,
507
    ) {
OlivierDehaene's avatar
OlivierDehaene committed
508
        let (response_tx, receiver_tx) = mpsc::unbounded_channel();
509

510
        let entry = Entry {
511
            request: ValidGenerateRequest {
512
                inputs: vec![],
513
                input_ids: Some(Arc::new(vec![])),
514
                input_length: 0,
515
                truncate: 0,
516
                decoder_input_details: false,
OlivierDehaene's avatar
OlivierDehaene committed
517
                parameters: ValidParameters {
518
519
520
                    temperature: 0.0,
                    top_k: 0,
                    top_p: 0.0,
521
                    typical_p: 0.0,
522
523
524
                    do_sample: false,
                    seed: 0,
                    repetition_penalty: 0.0,
525
                    frequency_penalty: 0.0,
526
                    watermark: false,
OlivierDehaene's avatar
OlivierDehaene committed
527
                    grammar: None,
528
                },
OlivierDehaene's avatar
OlivierDehaene committed
529
                stopping_parameters: ValidStoppingParameters {
530
                    ignore_eos_token: false,
531
                    max_new_tokens: 1,
532
533
                    stop_sequences: vec![],
                },
Nicolas Patry's avatar
Nicolas Patry committed
534
                top_n_tokens: 0,
drbh's avatar
drbh committed
535
                adapter_id: None,
536
537
            },
            response_tx,
538
539
540
            span: info_span!("entry"),
            temp_span: None,
            queue_time: Instant::now(),
541
            batch_time: None,
542
            block_allocation: None,
543
544
        };
        (entry, receiver_tx)
545
546
    }

547
548
    #[tokio::test]
    async fn test_append() {
549
        let mut state = State::new(false, 1, false, None, 0, 16);
550
        let (entry, _guard) = default_entry();
551
552
553
554
555
556
557
558

        assert_eq!(state.next_id, 0);
        assert_eq!(state.entries.len(), 0);

        state.append(entry);

        assert_eq!(state.next_id, 1);
        assert_eq!(state.entries.len(), 1);
559
        let (id, _) = state.entries.remove(0).unwrap();
560
561
562
        assert_eq!(id, 0);
    }

563
564
    #[tokio::test]
    async fn test_next_batch_empty() {
565
        let mut state = State::new(false, 1, false, None, 0, 16);
566

567
568
        assert!(state.next_batch(None, None, 1, 1).await.is_none());
        assert!(state.next_batch(Some(1), None, 1, 1).await.is_none());
569
570
    }

571
572
    #[tokio::test]
    async fn test_next_batch_min_size() {
573
        let mut state = State::new(false, 1, false, None, 0, 16);
574
575
576
577
        let (entry1, _guard1) = default_entry();
        let (entry2, _guard2) = default_entry();
        state.append(entry1);
        state.append(entry2);
578

579
        let (entries, batch, _) = state.next_batch(None, None, 2, 2).await.unwrap();
580
581
582
583
584
585
586
587
588
589
590
591
        assert_eq!(entries.len(), 2);
        assert!(entries.contains_key(&0));
        assert!(entries.contains_key(&1));
        assert!(entries.get(&0).unwrap().batch_time.is_some());
        assert!(entries.get(&1).unwrap().batch_time.is_some());
        assert_eq!(batch.id, 0);
        assert_eq!(batch.size, 2);

        assert_eq!(state.next_id, 2);
        assert_eq!(state.entries.len(), 0);
        assert_eq!(state.next_batch_id, 1);

592
593
        let (entry3, _guard3) = default_entry();
        state.append(entry3);
594

595
        assert!(state.next_batch(Some(2), None, 2, 2).await.is_none());
596
597
598

        assert_eq!(state.next_id, 3);
        assert_eq!(state.entries.len(), 1);
599
        let (id, _) = state.entries.remove(0).unwrap();
600
601
602
        assert_eq!(id, 2);
    }

603
604
    #[tokio::test]
    async fn test_next_batch_max_size() {
605
        let mut state = State::new(false, 1, false, None, 0, 16);
606
607
608
609
610
        let (entry1, _guard1) = default_entry();
        let (entry2, _guard2) = default_entry();
        state.append(entry1);
        state.append(entry2);

611
        let (entries, batch, _) = state.next_batch(None, Some(1), 2, 2).await.unwrap();
612
613
614
615
616
617
618
619
620
621
622
        assert_eq!(entries.len(), 1);
        assert!(entries.contains_key(&0));
        assert!(entries.get(&0).unwrap().batch_time.is_some());
        assert_eq!(batch.id, 0);
        assert_eq!(batch.size, 1);

        assert_eq!(state.next_id, 2);
        assert_eq!(state.entries.len(), 1);
        assert_eq!(state.next_batch_id, 1);
    }

623
624
    #[tokio::test]
    async fn test_next_batch_token_budget() {
625
        let mut state = State::new(false, 1, false, None, 0, 2);
626
627
628
629
        let (entry1, _guard1) = default_entry();
        let (entry2, _guard2) = default_entry();
        state.append(entry1);
        state.append(entry2);
630

631
        let (entries, batch, _) = state.next_batch(None, None, 1, 1).await.unwrap();
632
633
634
635
636
637
638
639
640
        assert_eq!(entries.len(), 1);
        assert!(entries.contains_key(&0));
        assert_eq!(batch.id, 0);
        assert_eq!(batch.size, 1);

        assert_eq!(state.next_id, 2);
        assert_eq!(state.entries.len(), 1);
        assert_eq!(state.next_batch_id, 1);

641
642
        let (entry3, _guard3) = default_entry();
        state.append(entry3);
643

644
        let (entries, batch, _) = state.next_batch(None, None, 3, 3).await.unwrap();
645
646
647
648
649
650
651
652
653
654
655
656
657
        assert_eq!(entries.len(), 2);
        assert!(entries.contains_key(&1));
        assert!(entries.contains_key(&2));
        assert_eq!(batch.id, 1);
        assert_eq!(batch.size, 2);

        assert_eq!(state.next_id, 3);
        assert_eq!(state.entries.len(), 0);
        assert_eq!(state.next_batch_id, 2);
    }

    #[tokio::test]
    async fn test_queue_append() {
658
        let queue = Queue::new(false, 1, false, None, 0, 16);
659
660
        let (entry, _guard) = default_entry();
        queue.append(entry);
661
662
663
664
    }

    #[tokio::test]
    async fn test_queue_next_batch_empty() {
665
        let queue = Queue::new(false, 1, false, None, 0, 16);
666

667
668
        assert!(queue.next_batch(None, None, 1, 1).await.is_none());
        assert!(queue.next_batch(Some(1), None, 1, 1).await.is_none());
669
670
671
672
    }

    #[tokio::test]
    async fn test_queue_next_batch_min_size() {
673
        let queue = Queue::new(false, 1, false, None, 0, 16);
674
675
676
677
        let (entry1, _guard1) = default_entry();
        let (entry2, _guard2) = default_entry();
        queue.append(entry1);
        queue.append(entry2);
678

679
        let (entries, batch, _) = queue.next_batch(None, None, 2, 2).await.unwrap();
680
681
682
683
684
685
686
687
        assert_eq!(entries.len(), 2);
        assert!(entries.contains_key(&0));
        assert!(entries.contains_key(&1));
        assert!(entries.get(&0).unwrap().batch_time.is_some());
        assert!(entries.get(&1).unwrap().batch_time.is_some());
        assert_eq!(batch.id, 0);
        assert_eq!(batch.size, 2);

688
689
        let (entry3, _guard3) = default_entry();
        queue.append(entry3);
690

691
        // Not enough requests pending
692
        assert!(queue.next_batch(Some(2), None, 2, 2).await.is_none());
693
        // Not enough token budget
694
        assert!(queue.next_batch(Some(1), None, 0, 0).await.is_none());
695
        // Ok
696
        let (entries2, batch2, _) = queue.next_batch(Some(1), None, 2, 2).await.unwrap();
697
698
699
700
701
        assert_eq!(entries2.len(), 1);
        assert!(entries2.contains_key(&2));
        assert!(entries2.get(&2).unwrap().batch_time.is_some());
        assert_eq!(batch2.id, 1);
        assert_eq!(batch2.size, 1);
702
703
    }

704
705
    #[tokio::test]
    async fn test_queue_next_batch_max_size() {
706
        let queue = Queue::new(false, 1, false, None, 0, 16);
707
708
709
710
711
712
713
714
715
716
717
718
719
        let (entry1, _guard1) = default_entry();
        let (entry2, _guard2) = default_entry();
        queue.append(entry1);
        queue.append(entry2);

        let (entries, batch, _) = queue.next_batch(None, Some(1), 2, 2).await.unwrap();
        assert_eq!(entries.len(), 1);
        assert!(entries.contains_key(&0));
        assert!(entries.get(&0).unwrap().batch_time.is_some());
        assert_eq!(batch.id, 0);
        assert_eq!(batch.size, 1);
    }

720
    #[tokio::test]
721
    async fn test_queue_next_batch_token_budget() {
722
        let queue = Queue::new(false, 1, false, None, 0, 16);
723
724
725
726
        let (entry1, _guard1) = default_entry();
        let (entry2, _guard2) = default_entry();
        queue.append(entry1);
        queue.append(entry2);
727

728
        let (entries, batch, _) = queue.next_batch(None, None, 1, 1).await.unwrap();
729
730
731
732
733
        assert_eq!(entries.len(), 1);
        assert!(entries.contains_key(&0));
        assert_eq!(batch.id, 0);
        assert_eq!(batch.size, 1);

734
735
        let (entry3, _guard3) = default_entry();
        queue.append(entry3);
736

737
        let (entries, batch, _) = queue.next_batch(None, None, 3, 3).await.unwrap();
738
739
740
741
742
743
        assert_eq!(entries.len(), 2);
        assert!(entries.contains_key(&1));
        assert!(entries.contains_key(&2));
        assert_eq!(batch.id, 1);
        assert_eq!(batch.size, 2);
    }
744

Nicolas Patry's avatar
Nicolas Patry committed
745
746
    #[tokio::test]
    async fn test_queue_next_batch_token_speculate() {
747
        let queue = Queue::new(false, 1, false, None, 2, 16);
Nicolas Patry's avatar
Nicolas Patry committed
748
749
750
751
752
753
        let (entry1, _guard1) = default_entry();
        let (entry2, _guard2) = default_entry();
        queue.append(entry1);
        queue.append(entry2);

        // Budget of 1 is not enough
754
        assert!(queue.next_batch(None, None, 1, 1).await.is_none());
Nicolas Patry's avatar
Nicolas Patry committed
755

756
        let (entries, batch, _) = queue.next_batch(None, None, 6, 6).await.unwrap();
Nicolas Patry's avatar
Nicolas Patry committed
757
758
759
760
761
762
763
        assert_eq!(entries.len(), 2);
        assert!(entries.contains_key(&0));
        assert!(entries.contains_key(&1));
        assert_eq!(batch.id, 0);
        assert_eq!(batch.size, 2);
    }

764
765
    #[tokio::test]
    async fn test_queue_next_batch_dropped_receiver() {
766
        let queue = Queue::new(false, 1, false, None, 0, 16);
767
768
769
        let (entry, _) = default_entry();
        queue.append(entry);

770
        assert!(queue.next_batch(None, None, 1, 1).await.is_none());
771
    }
772
}