main.rs 946 Bytes
Newer Older
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
1
use bloom_inference_client::ShardedClient;
Olivier Dehaene's avatar
Olivier Dehaene committed
2
use std::net::SocketAddr;
Olivier Dehaene's avatar
Olivier Dehaene committed
3
use text_generation_router::server;
Olivier Dehaene's avatar
Olivier Dehaene committed
4
use tokenizers::Tokenizer;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
5

Olivier Dehaene's avatar
Olivier Dehaene committed
6
7
8
9
10
11
12
13
14
fn main() -> Result<(), std::io::Error> {
    let tokenizer = Tokenizer::from_pretrained("bigscience/bloom", None).unwrap();

    tokio::runtime::Builder::new_multi_thread()
        .enable_all()
        .build()
        .unwrap()
        .block_on(async {
            tracing_subscriber::fmt::init();
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
15

Olivier Dehaene's avatar
Olivier Dehaene committed
16
17
18
            let sharded_client = ShardedClient::connect_uds("/tmp/bloom-inference-0".to_string())
                .await
                .expect("Could not connect to server");
Olivier Dehaene's avatar
Olivier Dehaene committed
19
20
21
22
23
            sharded_client
                .clear_cache()
                .await
                .expect("Unable to clear cache");
            tracing::info!("Connected");
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
24

Olivier Dehaene's avatar
Olivier Dehaene committed
25
            let addr = SocketAddr::from(([0, 0, 0, 0], 3000));
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
26

Olivier Dehaene's avatar
Olivier Dehaene committed
27
28
            server::run(sharded_client, tokenizer, addr).await;
            Ok(())
Olivier Dehaene's avatar
Olivier Dehaene committed
29
        })
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
30
}