queue.rs 27.2 KB
Newer Older
Nicolas Patry's avatar
Nicolas Patry committed
1
2
3
4
use crate::block_allocator::{BlockAllocation, BlockAllocator};
use crate::client;
use crate::client::{
    Batch, GrammarType, NextTokenChooserParameters, Request, StoppingCriteriaParameters,
OlivierDehaene's avatar
OlivierDehaene committed
5
};
6
use nohash_hasher::{BuildNoHashHasher, IntMap};
7
use std::cmp::{max, min};
8
use std::collections::VecDeque;
Nicolas Patry's avatar
Nicolas Patry committed
9
10
11
12
13
use text_generation_router::infer::InferError;
use text_generation_router::infer::InferStreamResponse;
use text_generation_router::validation::{
    Chunk, ChunksToString, ValidGenerateRequest, ValidGrammar, ValidParameters,
    ValidStoppingParameters,
OlivierDehaene's avatar
OlivierDehaene committed
14
};
OlivierDehaene's avatar
OlivierDehaene committed
15
use tokio::sync::{mpsc, oneshot};
16
use tokio::time::Instant;
17
use tracing::{info_span, instrument, Instrument, Span};
18
19
20
21
22
23
24

/// Queue entry
#[derive(Debug)]
pub(crate) struct Entry {
    /// Request
    pub request: ValidGenerateRequest,
    /// Response sender to communicate between the Infer struct and the batching_task
OlivierDehaene's avatar
OlivierDehaene committed
25
    pub response_tx: mpsc::UnboundedSender<Result<InferStreamResponse, InferError>>,
26
27
28
29
30
31
    /// Span that will live as long as entry
    pub span: Span,
    /// Temporary span used as a guard when logging inference, wait times...
    pub temp_span: Option<Span>,
    /// Instant when this entry was queued
    pub queue_time: Instant,
32
33
    /// Instant when this entry was added to a batch
    pub batch_time: Option<Instant>,
34
35
    /// Block Allocation
    pub block_allocation: Option<BlockAllocation>,
36
37
38
39
40
41
}

/// Request Queue
#[derive(Debug, Clone)]
pub(crate) struct Queue {
    /// Channel to communicate with the background queue task
OlivierDehaene's avatar
OlivierDehaene committed
42
    queue_sender: mpsc::UnboundedSender<QueueCommand>,
43
44
45
}

impl Queue {
Nicolas Patry's avatar
Nicolas Patry committed
46
47
48
    pub(crate) fn new(
        requires_padding: bool,
        block_size: u32,
49
        prefix_caching: bool,
Nicolas Patry's avatar
Nicolas Patry committed
50
51
        window_size: Option<u32>,
        speculate: u32,
52
        max_batch_total_tokens: u32,
Nicolas Patry's avatar
Nicolas Patry committed
53
    ) -> Self {
54
        // Create channel
OlivierDehaene's avatar
OlivierDehaene committed
55
        let (queue_sender, queue_receiver) = mpsc::unbounded_channel();
56
57

        // Launch background queue task
58
59
60
        tokio::spawn(queue_task(
            requires_padding,
            block_size,
61
            prefix_caching,
62
            window_size,
Nicolas Patry's avatar
Nicolas Patry committed
63
            speculate,
64
            max_batch_total_tokens,
65
66
            queue_receiver,
        ));
67
68
69
70

        Self { queue_sender }
    }

71
    /// Append an entry to the queue
72
    #[instrument(skip_all)]
73
74
75
    pub(crate) fn append(&self, entry: Entry) {
        // Send append command to the background task managing the state
        // Unwrap is safe here
76
        self.queue_sender
77
            .send(QueueCommand::Append(Box::new(entry), Span::current()))
78
            .unwrap();
79
80
81
    }

    // Get the next batch
82
    #[instrument(skip(self))]
83
84
85
    pub(crate) async fn next_batch(
        &self,
        min_size: Option<usize>,
86
        max_size: Option<usize>,
87
        prefill_token_budget: u32,
88
        token_budget: u32,
89
90
91
92
93
94
95
96
    ) -> Option<NextBatch> {
        // Create response channel
        let (response_sender, response_receiver) = oneshot::channel();
        // Send next batch command to the background task managing the state
        // Unwrap is safe here
        self.queue_sender
            .send(QueueCommand::NextBatch {
                min_size,
97
                max_size,
98
                prefill_token_budget,
99
                token_budget,
100
                response_sender,
101
                span: Span::current(),
102
103
104
105
106
107
108
109
110
            })
            .unwrap();
        // Await on response channel
        // Unwrap is safe here
        response_receiver.await.unwrap()
    }
}

// Background task responsible of the queue state
111
112
113
async fn queue_task(
    requires_padding: bool,
    block_size: u32,
114
    prefix_caching: bool,
115
    window_size: Option<u32>,
Nicolas Patry's avatar
Nicolas Patry committed
116
    speculate: u32,
117
    max_batch_total_tokens: u32,
OlivierDehaene's avatar
OlivierDehaene committed
118
    mut receiver: mpsc::UnboundedReceiver<QueueCommand>,
119
) {
120
121
122
    let mut state = State::new(
        requires_padding,
        block_size,
123
        prefix_caching,
124
125
126
127
        window_size,
        speculate,
        max_batch_total_tokens,
    );
128

OlivierDehaene's avatar
OlivierDehaene committed
129
    while let Some(cmd) = receiver.recv().await {
130
        match cmd {
131
            QueueCommand::Append(entry, span) => {
132
                span.in_scope(|| state.append(*entry));
133
                metrics::gauge!("tgi_queue_size").increment(1.0);
134
            }
135
136
            QueueCommand::NextBatch {
                min_size,
137
                max_size,
138
                prefill_token_budget,
139
                token_budget,
140
                response_sender,
141
                span,
142
143
144
145
146
            } => {
                let next_batch = state
                    .next_batch(min_size, max_size, prefill_token_budget, token_budget)
                    .instrument(span)
                    .await;
147
                response_sender.send(next_batch).unwrap();
148
                metrics::gauge!("tgi_queue_size").set(state.entries.len() as f64);
149
            }
150
151
152
153
154
155
156
157
        }
    }
}

/// Queue State
#[derive(Debug)]
struct State {
    /// Queue entries organized in a Vec
158
    entries: VecDeque<(u64, Entry)>,
159
160
161
162
163
164

    /// Id of the next entry
    next_id: u64,

    /// Id of the next batch
    next_batch_id: u64,
165

166
167
    /// Paged Attention block size
    block_size: u32,
168
169
170

    /// Sliding window
    window_size: Option<u32>,
Nicolas Patry's avatar
Nicolas Patry committed
171
172
173

    /// Speculation amount
    speculate: u32,
174
175
176

    /// Paged Attention Block Allocation
    block_allocator: Option<BlockAllocator>,
177
178
179
}

impl State {
Nicolas Patry's avatar
Nicolas Patry committed
180
181
182
    fn new(
        requires_padding: bool,
        block_size: u32,
183
        prefix_caching: bool,
Nicolas Patry's avatar
Nicolas Patry committed
184
185
        window_size: Option<u32>,
        speculate: u32,
186
        max_batch_total_tokens: u32,
Nicolas Patry's avatar
Nicolas Patry committed
187
    ) -> Self {
188
189
190
191
192
193
194
195
        let block_allocator = (!requires_padding).then(|| {
            BlockAllocator::new(
                max_batch_total_tokens,
                block_size,
                prefix_caching,
                window_size,
            )
        });
196

197
        Self {
198
            entries: VecDeque::with_capacity(128),
199
200
            next_id: 0,
            next_batch_id: 0,
201
            block_size,
202
            window_size,
Nicolas Patry's avatar
Nicolas Patry committed
203
            speculate,
204
            block_allocator,
205
206
207
208
        }
    }

    /// Append an entry to the queue
209
210
211
212
213
214
    fn append(&mut self, mut entry: Entry) {
        // Create a span that will live as long as the entry is in the queue waiting to be batched
        let queue_span = info_span!(parent: &entry.span, "queued");
        entry.temp_span = Some(queue_span);

        // Push entry in the queue
215
        self.entries.push_back((self.next_id, entry));
216
217
218
219
        self.next_id += 1;
    }

    // Get the next batch
220
    async fn next_batch(
221
222
        &mut self,
        min_size: Option<usize>,
223
        max_size: Option<usize>,
224
225
226
        prefill_token_budget: u32,
        token_budget: u32,
    ) -> Option<NextBatch> {
227
        if self.entries.is_empty() {
228
            tracing::debug!("No queue");
229
230
231
232
233
234
            return None;
        }

        // Check if we have enough entries
        if let Some(min_size) = min_size {
            if self.entries.len() < min_size {
235
                tracing::debug!("Not enough entries");
236
237
238
239
                return None;
            }
        }

drbh's avatar
drbh committed
240
241
242
243
244
245
246
        if let Some(max_size) = max_size {
            if max_size == 0 {
                tracing::debug!("No capacity");
                return None;
            }
        }

247
248
249
250
        // Pad prefill_token_budget to be a multiple of block size
        let prefill_token_budget =
            ((prefill_token_budget + self.block_size - 1) / self.block_size) * self.block_size;

251
        // Create span for this batch to add context to inference calls
252
        let next_batch_span = info_span!(parent: None, "batch", batch_size = tracing::field::Empty);
253
        next_batch_span.follows_from(Span::current());
254

255
        let mut batch = Vec::with_capacity(self.entries.len());
256
257
258
        let mut max_input_length = 0;
        let mut prefill_tokens: u32 = 0;
        let mut decode_tokens: u32 = 0;
259
        let mut max_blocks = 0;
260
261

        // Pop entries starting from the front of the queue
262
        'entry_loop: while let Some((id, entry)) = self.entries.pop_front() {
263
264
            // Filter entries where the response receiver was dropped (== entries where the request
            // was dropped by the client)
OlivierDehaene's avatar
OlivierDehaene committed
265
            if entry.response_tx.is_closed() {
266
                metrics::counter!("tgi_request_failure", "err" => "dropped").increment(1);
267
                tracing::debug!("Dropping entry");
268
269
270
                continue;
            }

271
272
273
274
275
            let block_allocation = match &self.block_allocator {
                None => {
                    // We pad to max input length in the Python shards
                    // We need to take these padding tokens into the equation
                    max_input_length = max_input_length.max(entry.request.input_length);
276
                    prefill_tokens = (batch.len() + 1) as u32 * max_input_length;
277
278
279
280
281
282
283
284
285
286
287
288
289

                    decode_tokens += entry.request.stopping_parameters.max_new_tokens;
                    let total_tokens = prefill_tokens + decode_tokens + self.speculate;

                    if prefill_tokens > prefill_token_budget || total_tokens > token_budget {
                        // Entry is over budget
                        // Add it back to the front
                        tracing::debug!("Over budget: prefill_tokens={prefill_tokens} > {prefill_token_budget} || {prefill_tokens} + {decode_tokens} + {} > {token_budget}", self.speculate);
                        self.entries.push_front((id, entry));
                        break 'entry_loop;
                    }
                    None
                }
290
                Some(_block_allocator) => {
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
                    prefill_tokens += entry.request.input_length;
                    let max_new_tokens = match self.window_size {
                        None => entry.request.stopping_parameters.max_new_tokens,
                        Some(window_size) => min(
                            window_size.saturating_sub(entry.request.input_length),
                            entry.request.stopping_parameters.max_new_tokens,
                        ),
                    };
                    decode_tokens += max_new_tokens;

                    if prefill_tokens > prefill_token_budget
                        || (prefill_tokens + decode_tokens + self.speculate) > token_budget
                    {
                        // Entry is over budget
                        // Add it back to the front
                        tracing::debug!("Over budget: prefill_tokens={prefill_tokens} > {prefill_token_budget} || {prefill_tokens} + {decode_tokens} + {} > {token_budget}", self.speculate);
                        self.entries.push_front((id, entry));
                        break;
                    }

                    let tokens = entry.request.input_length
                        + entry.request.stopping_parameters.max_new_tokens
                        + self.speculate
                        - 1;

Nicolas Patry's avatar
Nicolas Patry committed
316
317
318
319
320
321
322
323
                    // If users wants the prefill logprobs, we cannot reuse the cache.
                    // So no input_ids for the radix tree.
                    let input_ids = if entry.request.decoder_input_details {
                        None
                    } else {
                        entry.request.input_ids.clone()
                    };

324
                    Some((tokens, input_ids))
325
326
                }
            };
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
            batch.push((id, entry, block_allocation));
            if Some(batch.len()) == max_size {
                break;
            }
        }

        // Empty batch
        if batch.is_empty() {
            tracing::debug!("Filterered out all entries");
            return None;
        }

        // XXX We haven't allocated yet, so we're allowed to ditch the results.
        // Check if our batch is big enough
        if let Some(min_size) = min_size {
            // Batch is too small
            if batch.len() < min_size {
                // Add back entries to the queue in the correct order
                for (id, entry, _) in batch.into_iter().rev() {
                    self.entries.push_front((id, entry));
                }
                return None;
            }
        }

        let mut batch_requests = Vec::with_capacity(self.entries.len());
        let mut batch_entries =
            IntMap::with_capacity_and_hasher(self.entries.len(), BuildNoHashHasher::default());
355

356
357
358
359
        for (id, mut entry, block_allocation) in batch {
            let block_allocation = if let (Some((tokens, input_ids)), Some(block_allocator)) =
                (block_allocation, &self.block_allocator)
            {
Nicolas Patry's avatar
Nicolas Patry committed
360
                tracing::debug!("Allocating {tokens} with {input_ids:?}");
361
362
363
364
365
366
                match block_allocator.allocate(tokens, input_ids).await {
                    None => {
                        // Entry is over budget
                        // Add it back to the front
                        tracing::debug!("Over budget: not enough free blocks");
                        self.entries.push_front((id, entry));
Nicolas Patry's avatar
Nicolas Patry committed
367
                        continue;
368
369
370
371
372
373
374
375
376
377
                    }
                    Some(block_allocation) => {
                        tracing::debug!("Allocation: {block_allocation:?}");
                        max_blocks = max(max_blocks, block_allocation.blocks.len() as u32);
                        Some(block_allocation)
                    }
                }
            } else {
                None
            };
378
            tracing::debug!("Accepting entry");
379
380
381
382
383
384
385
386
            // Create a new span to link the batch back to this entry
            let entry_batch_span = info_span!(parent: &entry.span, "infer");
            // Add relationships
            next_batch_span.follows_from(&entry_batch_span);
            entry_batch_span.follows_from(&next_batch_span);
            // Update entry
            entry.temp_span = Some(entry_batch_span);

387
388
            let (blocks, slots, prefix_len) = match &block_allocation {
                None => (Vec::new(), Vec::new(), 0),
389
390
391
                Some(block_allocation) => (
                    block_allocation.blocks.clone(),
                    block_allocation.slots.clone(),
392
                    block_allocation.prefix_len,
393
394
395
396
397
                ),
            };

            entry.block_allocation = block_allocation;

398
399
            batch_requests.push(Request {
                id,
400
                prefill_logprobs: entry.request.decoder_input_details,
Nicolas Patry's avatar
Nicolas Patry committed
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
                input_chunks: Some(client::Input {
                    chunks: entry
                        .request
                        .inputs
                        .clone()
                        .into_iter()
                        .map(|c| client::InputChunk {
                            chunk: Some(match c {
                                Chunk::Text(text) => client::Chunk::Text(text),
                                Chunk::Image(image) => client::Chunk::Image(client::Image {
                                    data: image.data,
                                    mimetype: image.mimetype,
                                }),
                            }),
                        })
                        .collect(),
417
                }),
418
                inputs: entry.request.inputs.chunks_to_string(),
419
                truncate: entry.request.truncate,
420
                add_special_tokens: entry.request.add_special_tokens,
OlivierDehaene's avatar
OlivierDehaene committed
421
422
423
424
425
426
                parameters: Some(NextTokenChooserParameters::from(
                    entry.request.parameters.clone(),
                )),
                stopping_parameters: Some(StoppingCriteriaParameters::from(
                    entry.request.stopping_parameters.clone(),
                )),
Nicolas Patry's avatar
Nicolas Patry committed
427
                top_n_tokens: entry.request.top_n_tokens,
428
429
                blocks,
                slots,
430
                prefix_len,
drbh's avatar
drbh committed
431
                adapter_id: entry.request.adapter_id.clone(),
432
            });
433
434
435
436
            // Set batch_time
            entry.batch_time = Some(Instant::now());
            // Insert in batch_entries IntMap
            batch_entries.insert(id, entry);
437
438
        }

Nicolas Patry's avatar
Nicolas Patry committed
439
440
441
442
443
444
        // Empty batch
        if batch_requests.is_empty() {
            tracing::debug!("Filterered out all entries");
            return None;
        }

445
        // Final batch size
446
447
        let size = batch_requests.len() as u32;
        next_batch_span.record("batch_size", size);
448
449
450
451

        let batch = Batch {
            id: self.next_batch_id,
            requests: batch_requests,
452
            size,
453
            max_tokens: (prefill_tokens + decode_tokens),
454
            max_blocks,
455
456
457
458
        };
        // Increment batch id
        self.next_batch_id += 1;

459
        metrics::histogram!("tgi_batch_next_size").record(batch.size as f64);
460

461
        Some((batch_entries, batch, next_batch_span))
462
463
464
    }
}

465
type NextBatch = (IntMap<u64, Entry>, Batch, Span);
466
467
468

#[derive(Debug)]
enum QueueCommand {
469
    Append(Box<Entry>, Span),
470
471
    NextBatch {
        min_size: Option<usize>,
472
        max_size: Option<usize>,
473
        prefill_token_budget: u32,
474
        token_budget: u32,
475
        response_sender: oneshot::Sender<Option<NextBatch>>,
476
        span: Span,
477
478
479
    },
}

OlivierDehaene's avatar
OlivierDehaene committed
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
impl From<ValidParameters> for NextTokenChooserParameters {
    fn from(value: ValidParameters) -> Self {
        let (grammar, grammar_type) = match value.grammar {
            None => (String::new(), GrammarType::None),

            Some(grammar) => match grammar {
                ValidGrammar::Json(grammar_string) => (grammar_string, GrammarType::Json),
                ValidGrammar::Regex(grammar_string) => (grammar_string, GrammarType::Regex),
            },
        };

        Self {
            temperature: value.temperature,
            top_k: value.top_k,
            top_p: value.top_p,
            typical_p: value.typical_p,
            do_sample: value.do_sample,
            seed: value.seed,
            repetition_penalty: value.repetition_penalty,
            frequency_penalty: value.frequency_penalty,
            watermark: value.watermark,
            grammar,
            grammar_type: grammar_type.into(),
        }
    }
}

impl From<ValidStoppingParameters> for StoppingCriteriaParameters {
    fn from(value: ValidStoppingParameters) -> Self {
        Self {
            max_new_tokens: value.max_new_tokens,
            stop_sequences: value.stop_sequences,
            ignore_eos_token: value.ignore_eos_token,
        }
    }
}

517
518
#[cfg(test)]
mod tests {
519
520
    use std::sync::Arc;

521
    use super::*;
522
    use tracing::info_span;
523

524
525
    fn default_entry() -> (
        Entry,
OlivierDehaene's avatar
OlivierDehaene committed
526
        mpsc::UnboundedReceiver<Result<InferStreamResponse, InferError>>,
527
    ) {
OlivierDehaene's avatar
OlivierDehaene committed
528
        let (response_tx, receiver_tx) = mpsc::unbounded_channel();
529

530
        let entry = Entry {
531
            request: ValidGenerateRequest {
532
                inputs: vec![],
533
                input_ids: Some(Arc::new(vec![])),
534
                input_length: 0,
535
                add_special_tokens: true,
536
                truncate: 0,
537
                decoder_input_details: false,
OlivierDehaene's avatar
OlivierDehaene committed
538
                parameters: ValidParameters {
539
540
541
                    temperature: 0.0,
                    top_k: 0,
                    top_p: 0.0,
542
                    typical_p: 0.0,
543
544
545
                    do_sample: false,
                    seed: 0,
                    repetition_penalty: 0.0,
546
                    frequency_penalty: 0.0,
547
                    watermark: false,
OlivierDehaene's avatar
OlivierDehaene committed
548
                    grammar: None,
549
                },
OlivierDehaene's avatar
OlivierDehaene committed
550
                stopping_parameters: ValidStoppingParameters {
551
                    ignore_eos_token: false,
552
                    max_new_tokens: 1,
553
554
                    stop_sequences: vec![],
                },
Nicolas Patry's avatar
Nicolas Patry committed
555
                top_n_tokens: 0,
drbh's avatar
drbh committed
556
                adapter_id: None,
557
558
            },
            response_tx,
559
560
561
            span: info_span!("entry"),
            temp_span: None,
            queue_time: Instant::now(),
562
            batch_time: None,
563
            block_allocation: None,
564
565
        };
        (entry, receiver_tx)
566
567
    }

568
569
    #[tokio::test]
    async fn test_append() {
570
        let mut state = State::new(false, 1, false, None, 0, 16);
571
        let (entry, _guard) = default_entry();
572
573
574
575
576
577
578
579

        assert_eq!(state.next_id, 0);
        assert_eq!(state.entries.len(), 0);

        state.append(entry);

        assert_eq!(state.next_id, 1);
        assert_eq!(state.entries.len(), 1);
580
        let (id, _) = state.entries.remove(0).unwrap();
581
582
583
        assert_eq!(id, 0);
    }

584
585
    #[tokio::test]
    async fn test_next_batch_empty() {
586
        let mut state = State::new(false, 1, false, None, 0, 16);
587

588
589
        assert!(state.next_batch(None, None, 1, 1).await.is_none());
        assert!(state.next_batch(Some(1), None, 1, 1).await.is_none());
590
591
    }

592
593
    #[tokio::test]
    async fn test_next_batch_min_size() {
594
        let mut state = State::new(false, 1, false, None, 0, 16);
595
596
597
598
        let (entry1, _guard1) = default_entry();
        let (entry2, _guard2) = default_entry();
        state.append(entry1);
        state.append(entry2);
599

600
        let (entries, batch, _) = state.next_batch(None, None, 2, 2).await.unwrap();
601
602
603
604
605
606
607
608
609
610
611
612
        assert_eq!(entries.len(), 2);
        assert!(entries.contains_key(&0));
        assert!(entries.contains_key(&1));
        assert!(entries.get(&0).unwrap().batch_time.is_some());
        assert!(entries.get(&1).unwrap().batch_time.is_some());
        assert_eq!(batch.id, 0);
        assert_eq!(batch.size, 2);

        assert_eq!(state.next_id, 2);
        assert_eq!(state.entries.len(), 0);
        assert_eq!(state.next_batch_id, 1);

613
614
        let (entry3, _guard3) = default_entry();
        state.append(entry3);
615

616
        assert!(state.next_batch(Some(2), None, 2, 2).await.is_none());
617
618
619

        assert_eq!(state.next_id, 3);
        assert_eq!(state.entries.len(), 1);
620
        let (id, _) = state.entries.remove(0).unwrap();
621
622
623
        assert_eq!(id, 2);
    }

624
625
    #[tokio::test]
    async fn test_next_batch_max_size() {
626
        let mut state = State::new(false, 1, false, None, 0, 16);
627
628
629
630
631
        let (entry1, _guard1) = default_entry();
        let (entry2, _guard2) = default_entry();
        state.append(entry1);
        state.append(entry2);

632
        let (entries, batch, _) = state.next_batch(None, Some(1), 2, 2).await.unwrap();
633
634
635
636
637
638
639
640
641
642
643
        assert_eq!(entries.len(), 1);
        assert!(entries.contains_key(&0));
        assert!(entries.get(&0).unwrap().batch_time.is_some());
        assert_eq!(batch.id, 0);
        assert_eq!(batch.size, 1);

        assert_eq!(state.next_id, 2);
        assert_eq!(state.entries.len(), 1);
        assert_eq!(state.next_batch_id, 1);
    }

644
645
    #[tokio::test]
    async fn test_next_batch_token_budget() {
646
        let mut state = State::new(false, 1, false, None, 0, 2);
647
648
649
650
        let (entry1, _guard1) = default_entry();
        let (entry2, _guard2) = default_entry();
        state.append(entry1);
        state.append(entry2);
651

652
        let (entries, batch, _) = state.next_batch(None, None, 1, 1).await.unwrap();
653
654
655
656
657
658
659
660
661
        assert_eq!(entries.len(), 1);
        assert!(entries.contains_key(&0));
        assert_eq!(batch.id, 0);
        assert_eq!(batch.size, 1);

        assert_eq!(state.next_id, 2);
        assert_eq!(state.entries.len(), 1);
        assert_eq!(state.next_batch_id, 1);

662
663
        let (entry3, _guard3) = default_entry();
        state.append(entry3);
664

665
        let (entries, batch, _) = state.next_batch(None, None, 3, 3).await.unwrap();
666
667
668
669
670
671
672
673
674
675
676
677
678
        assert_eq!(entries.len(), 2);
        assert!(entries.contains_key(&1));
        assert!(entries.contains_key(&2));
        assert_eq!(batch.id, 1);
        assert_eq!(batch.size, 2);

        assert_eq!(state.next_id, 3);
        assert_eq!(state.entries.len(), 0);
        assert_eq!(state.next_batch_id, 2);
    }

    #[tokio::test]
    async fn test_queue_append() {
679
        let queue = Queue::new(false, 1, false, None, 0, 16);
680
681
        let (entry, _guard) = default_entry();
        queue.append(entry);
682
683
684
685
    }

    #[tokio::test]
    async fn test_queue_next_batch_empty() {
686
        let queue = Queue::new(false, 1, false, None, 0, 16);
687

688
689
        assert!(queue.next_batch(None, None, 1, 1).await.is_none());
        assert!(queue.next_batch(Some(1), None, 1, 1).await.is_none());
690
691
692
693
    }

    #[tokio::test]
    async fn test_queue_next_batch_min_size() {
694
        let queue = Queue::new(false, 1, false, None, 0, 16);
695
696
697
698
        let (entry1, _guard1) = default_entry();
        let (entry2, _guard2) = default_entry();
        queue.append(entry1);
        queue.append(entry2);
699

700
        let (entries, batch, _) = queue.next_batch(None, None, 2, 2).await.unwrap();
701
702
703
704
705
706
707
708
        assert_eq!(entries.len(), 2);
        assert!(entries.contains_key(&0));
        assert!(entries.contains_key(&1));
        assert!(entries.get(&0).unwrap().batch_time.is_some());
        assert!(entries.get(&1).unwrap().batch_time.is_some());
        assert_eq!(batch.id, 0);
        assert_eq!(batch.size, 2);

709
710
        let (entry3, _guard3) = default_entry();
        queue.append(entry3);
711

712
        // Not enough requests pending
713
        assert!(queue.next_batch(Some(2), None, 2, 2).await.is_none());
714
        // Not enough token budget
715
        assert!(queue.next_batch(Some(1), None, 0, 0).await.is_none());
716
        // Ok
717
        let (entries2, batch2, _) = queue.next_batch(Some(1), None, 2, 2).await.unwrap();
718
719
720
721
722
        assert_eq!(entries2.len(), 1);
        assert!(entries2.contains_key(&2));
        assert!(entries2.get(&2).unwrap().batch_time.is_some());
        assert_eq!(batch2.id, 1);
        assert_eq!(batch2.size, 1);
723
724
    }

725
726
    #[tokio::test]
    async fn test_queue_next_batch_max_size() {
727
        let queue = Queue::new(false, 1, false, None, 0, 16);
728
729
730
731
732
733
734
735
736
737
738
739
740
        let (entry1, _guard1) = default_entry();
        let (entry2, _guard2) = default_entry();
        queue.append(entry1);
        queue.append(entry2);

        let (entries, batch, _) = queue.next_batch(None, Some(1), 2, 2).await.unwrap();
        assert_eq!(entries.len(), 1);
        assert!(entries.contains_key(&0));
        assert!(entries.get(&0).unwrap().batch_time.is_some());
        assert_eq!(batch.id, 0);
        assert_eq!(batch.size, 1);
    }

741
    #[tokio::test]
742
    async fn test_queue_next_batch_token_budget() {
743
        let queue = Queue::new(false, 1, false, None, 0, 16);
744
745
746
747
        let (entry1, _guard1) = default_entry();
        let (entry2, _guard2) = default_entry();
        queue.append(entry1);
        queue.append(entry2);
748

749
        let (entries, batch, _) = queue.next_batch(None, None, 1, 1).await.unwrap();
750
751
752
753
754
        assert_eq!(entries.len(), 1);
        assert!(entries.contains_key(&0));
        assert_eq!(batch.id, 0);
        assert_eq!(batch.size, 1);

755
756
        let (entry3, _guard3) = default_entry();
        queue.append(entry3);
757

758
        let (entries, batch, _) = queue.next_batch(None, None, 3, 3).await.unwrap();
759
760
761
762
763
764
        assert_eq!(entries.len(), 2);
        assert!(entries.contains_key(&1));
        assert!(entries.contains_key(&2));
        assert_eq!(batch.id, 1);
        assert_eq!(batch.size, 2);
    }
765

Nicolas Patry's avatar
Nicolas Patry committed
766
767
    #[tokio::test]
    async fn test_queue_next_batch_token_speculate() {
768
        let queue = Queue::new(false, 1, false, None, 2, 16);
Nicolas Patry's avatar
Nicolas Patry committed
769
770
771
772
773
774
        let (entry1, _guard1) = default_entry();
        let (entry2, _guard2) = default_entry();
        queue.append(entry1);
        queue.append(entry2);

        // Budget of 1 is not enough
775
        assert!(queue.next_batch(None, None, 1, 1).await.is_none());
Nicolas Patry's avatar
Nicolas Patry committed
776

777
        let (entries, batch, _) = queue.next_batch(None, None, 6, 6).await.unwrap();
Nicolas Patry's avatar
Nicolas Patry committed
778
779
780
781
782
783
784
        assert_eq!(entries.len(), 2);
        assert!(entries.contains_key(&0));
        assert!(entries.contains_key(&1));
        assert_eq!(batch.id, 0);
        assert_eq!(batch.size, 2);
    }

785
786
    #[tokio::test]
    async fn test_queue_next_batch_dropped_receiver() {
787
        let queue = Queue::new(false, 1, false, None, 0, 16);
788
789
790
        let (entry, _) = default_entry();
        queue.append(entry);

791
        assert!(queue.next_batch(None, None, 1, 1).await.is_none());
792
    }
793
}