"vscode:/vscode.git/clone" did not exist on "8dbe1be6c7f96352f95c786cd39041bf8e557000"
sharded_client.rs 2.99 KB
Newer Older
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
1
/// Multi shard Client
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
2
use crate::Result;
3
use crate::{Batch, Client, Generation};
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
4
5
use futures::future::join_all;
use tonic::transport::Uri;
6
use tracing::instrument;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
7

Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
8
/// Text Generation Inference gRPC multi client
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
9
pub struct ShardedClient {
10
    clients: Vec<Client>,
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
11
12
13
}

impl ShardedClient {
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
14
    fn new(clients: Vec<Client>) -> Self {
15
        Self { clients }
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
16
17
    }

Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
18
19
    /// Create a new ShardedClient from a master client. The master client will communicate with
    /// the other shards and returns all uris/unix sockets with the `service_discovery` gRPC method.
Olivier Dehaene's avatar
Olivier Dehaene committed
20
    async fn from_master_client(mut master_client: Client) -> Result<Self> {
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
21
        // Get all uris/unix sockets from the master client
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
22
        let uris = master_client.service_discovery().await.unwrap();
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
23
        let futures = uris.into_iter().map(Client::connect_uds);
Olivier Dehaene's avatar
Olivier Dehaene committed
24
25
        let clients: Result<Vec<Client>> = join_all(futures).await.into_iter().collect();
        Ok(Self::new(clients?))
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
26
27
    }

Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
28
    /// Returns a client connected to the given uri
Olivier Dehaene's avatar
Olivier Dehaene committed
29
30
    pub async fn connect(uri: Uri) -> Result<Self> {
        let master_client = Client::connect(uri).await?;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
31
32
33
        Self::from_master_client(master_client).await
    }

Olivier Dehaene's avatar
Olivier Dehaene committed
34
35
36
    /// Returns a client connected to the given unix socket
    pub async fn connect_uds(path: String) -> Result<Self> {
        let master_client = Client::connect_uds(path).await?;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
37
38
39
        Self::from_master_client(master_client).await
    }

40
    /// Clear the past generations cache
41
    #[instrument(skip(self))]
42
    pub async fn clear_cache(&mut self, batch_id: Option<u64>) -> Result<()> {
43
44
45
        let futures: Vec<_> = self
            .clients
            .iter_mut()
46
            .map(|client| client.clear_cache(batch_id))
47
48
49
50
            .collect();
        join_all(futures).await.into_iter().collect()
    }

Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
51
52
    /// Generate one token for each request in the given batch
    ///
53
    /// Returns Generation for each request in batch
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
54
    /// and the next cached batch
55
    #[instrument(skip_all, fields(id = &batch.id, size = &batch.size))]
56
    pub async fn prefill(&mut self, batch: Batch) -> Result<(Vec<Generation>, Option<Batch>)> {
57
58
59
        let futures: Vec<_> = self
            .clients
            .iter_mut()
60
            .map(|client| Box::pin(client.prefill(batch.clone())))
61
            .collect();
62
63
        // all shards return the same message
        join_all(futures).await.pop().unwrap()
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
64
65
    }

66
    /// Generate one token for each request in the given cached batches
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
67
    ///
68
    /// Returns Generation for each request in batches
Olivier Dehaene's avatar
v0.1.0  
Olivier Dehaene committed
69
    /// and the next cached batch
70
    #[instrument(skip_all, fields(size = batches.iter().map(|batch|{batch.size}).sum::<u32>()))]
71
    pub async fn decode(
72
        &mut self,
Olivier Dehaene's avatar
Olivier Dehaene committed
73
        batches: Vec<Batch>,
74
    ) -> Result<(Vec<Generation>, Option<Batch>)> {
75
76
77
        let futures: Vec<_> = self
            .clients
            .iter_mut()
78
            .map(|client| Box::pin(client.decode(batches.clone())))
79
            .collect();
80
81
        // all shards return the same message
        join_all(futures).await.pop().unwrap()
Olivier Dehaene's avatar
Olivier Dehaene committed
82
    }
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
83
}