lib.rs 4.42 KB
Newer Older
1
2
3
mod app;
mod event;
mod generation;
4
mod table;
5
6
7
8
mod utils;

use crate::app::App;
use crate::event::Event;
9
use ratatui::crossterm::ExecutableCommand;
10
11
use ratatui::backend::CrosstermBackend;
use ratatui::Terminal;
12
use std::io;
OlivierDehaene's avatar
OlivierDehaene committed
13
use text_generation_client::v3::{GrammarType, NextTokenChooserParameters, ShardedClient};
14
15
16
17
18
19
20
21
22
23
24
use tokenizers::Tokenizer;
use tokio::sync::{broadcast, mpsc};

/// Run benchmarking app
#[allow(clippy::too_many_arguments)]
pub async fn run(
    tokenizer_name: String,
    tokenizer: Tokenizer,
    batch_size: Vec<u32>,
    sequence_length: u32,
    decode_length: u32,
Nicolas Patry's avatar
Nicolas Patry committed
25
    top_n_tokens: Option<u32>,
26
27
    n_runs: usize,
    warmups: usize,
28
29
30
31
32
    temperature: Option<f32>,
    top_k: Option<u32>,
    top_p: Option<f32>,
    typical_p: Option<f32>,
    repetition_penalty: Option<f32>,
33
    frequency_penalty: Option<f32>,
34
35
    watermark: bool,
    do_sample: bool,
36
    client: ShardedClient,
Nicolas Patry's avatar
Nicolas Patry committed
37
) -> Result<(), std::io::Error> {
38
39
40
41
42
43
44
45
    let parameters = NextTokenChooserParameters {
        temperature: temperature.unwrap_or(1.0),
        top_k: top_k.unwrap_or(0),
        top_p: top_p.unwrap_or(1.0),
        typical_p: typical_p.unwrap_or(1.0),
        do_sample,
        seed: 0,
        repetition_penalty: repetition_penalty.unwrap_or(1.0),
46
        frequency_penalty: frequency_penalty.unwrap_or(0.0),
47
        watermark,
drbh's avatar
drbh committed
48
49
        grammar: String::new(),
        grammar_type: GrammarType::None as i32,
50
51
    };

52
    // Initialize terminal properties
53
54
55
    ratatui::crossterm::terminal::enable_raw_mode()?;
    io::stdout().execute(ratatui::crossterm::terminal::EnterAlternateScreen)?;
    io::stdout().execute(ratatui::crossterm::cursor::Hide)?;
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77

    // Initialize terminal
    let mut terminal = {
        let backend = CrosstermBackend::new(io::stdout());
        Terminal::new(backend)?
    };

    // Create message channel between generation_task and app
    let (run_sender, run_receiver) = mpsc::channel(8);
    // Crossterm event channel
    let (event_sender, mut event_receiver) = mpsc::channel(8);
    // Shutdown channel to terminate tasks
    let (shutdown_sender, _) = broadcast::channel(1);
    // Channel to check if tasks terminated
    let (shutdown_guard_sender, mut shutdown_guard_receiver) = mpsc::channel(1);

    // Create generation task
    tokio::spawn(generation::generation_task(
        tokenizer,
        batch_size.clone(),
        sequence_length,
        decode_length,
Nicolas Patry's avatar
Nicolas Patry committed
78
        top_n_tokens,
79
80
        n_runs,
        warmups,
81
        parameters,
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
        client,
        run_sender,
        shutdown_sender.subscribe(),
        shutdown_guard_sender.clone(),
    ));

    // Create event task
    tokio::spawn(event::terminal_event_task(
        250,
        event_sender,
        shutdown_sender.subscribe(),
        shutdown_guard_sender.clone(),
    ));

    // Drop our end of shutdown sender
    drop(shutdown_guard_sender);

    // Create App
    let mut app = App::new(
        run_receiver,
102
        tokenizer_name.clone(),
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
        sequence_length,
        decode_length,
        n_runs,
        batch_size,
    );

    while app.running {
        // Draw frame
        terminal.draw(|frame| app.render(frame))?;

        // Await a new event from event handling task
        match event_receiver.recv().await {
            None => break,
            // Update app state
            Some(event) => match event {
                Event::Tick => app.tick(),
                Event::Key(key_event) => app.handle_key_event(key_event),
                _ => {}
            },
        }
    }

    // Ask tasks to shutdown
    let _ = shutdown_sender.send(());
    // Wait for tasks to shutdown
    let _ = shutdown_guard_receiver.recv().await;

    // Revert terminal to original view
131
132
133
    io::stdout().execute(ratatui::crossterm::terminal::LeaveAlternateScreen)?;
    ratatui::crossterm::terminal::disable_raw_mode()?;
    io::stdout().execute(ratatui::crossterm::cursor::Show)?;
134

135
136
137
138
    let parameters_table = table::parameters_table(
        tokenizer_name,
        sequence_length,
        decode_length,
Nicolas Patry's avatar
Nicolas Patry committed
139
        top_n_tokens,
140
141
142
143
144
145
146
        n_runs,
        warmups,
        temperature,
        top_k,
        top_p,
        typical_p,
        repetition_penalty,
147
        frequency_penalty,
148
149
150
151
152
153
154
155
156
157
158
        watermark,
        do_sample,
    );
    println!("\n{parameters_table}\n");

    let latency_table = table::latency_table(&app.data);
    println!("\n{latency_table}\n");

    let throughput_table = table::throughput_table(&app.data);
    println!("\n{throughput_table}\n");

159
160
    Ok(())
}