generate.proto 2.01 KB
Newer Older
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
1
2
3
4
syntax = "proto3";

package generate.v1;

Olivier Dehaene's avatar
Olivier Dehaene committed
5
service TextGenerationService {
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
6
    /// Service discovery
Olivier Dehaene's avatar
Olivier Dehaene committed
7
    rpc ServiceDiscovery (ServiceDiscoveryRequest) returns (ServiceDiscoveryResponse) {}
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
8
    /// Empties batch cache
Olivier Dehaene's avatar
Olivier Dehaene committed
9
10
11
12
13
    rpc ClearCache (ClearCacheRequest) returns (ClearCacheResponse);
    /// Generate tokens for a batch
    rpc Generate (GenerateRequest) returns (GenerateResponse);
    /// Generate tokens for a list of cached batches
    rpc GenerateWithCache (GenerateWithCacheRequest) returns (GenerateWithCacheResponse);
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
14
15
}

Olivier Dehaene's avatar
Olivier Dehaene committed
16
17
18
/// Empty request
message ServiceDiscoveryRequest {}

Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
19
message ServiceDiscoveryResponse {
Olivier Dehaene's avatar
Olivier Dehaene committed
20
    /// Other shards urls
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
21
22
23
    repeated string urls = 1;
}

Olivier Dehaene's avatar
Olivier Dehaene committed
24
25
26
27
28
29
/// Empty request
message ClearCacheRequest {}

/// Empty response
message ClearCacheResponse {}

Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
30
31
32
33
34
35
36
37
38
39
40
41
message LogitsWarperParameters {
    float temperature = 1;
    uint32 top_k = 2;
    float top_p = 3;
    bool do_sample = 4;
}

message Request {
    /// Request ID
    uint64 id = 1;
    /// The generation context
    string inputs = 2;
Olivier Dehaene's avatar
Olivier Dehaene committed
42
43
    /// The number of tokens inside inputs
    uint32 input_length = 3;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
44
    /// Logits Warper Parameters
Olivier Dehaene's avatar
Olivier Dehaene committed
45
    LogitsWarperParameters parameters = 4;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
46
    /// Stopping criteria
Olivier Dehaene's avatar
Olivier Dehaene committed
47
    uint32 max_new_tokens = 5;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
48
49
50
51
52
53
54
}

message Batch {
    /// Batch ID
    uint64 id = 1;
    /// Individual requests
    repeated Request requests = 2;
Olivier Dehaene's avatar
Olivier Dehaene committed
55
56
    /// Batch size (==len(requests))
    uint32 size = 3;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
57
58
}

Olivier Dehaene's avatar
Olivier Dehaene committed
59
60
61
message GeneratedText {
    /// Request
    Request request = 1;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
62
63
    /// Output
    string output = 2;
64
65
    /// Number of generated tokens
    uint32 tokens = 3;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
66
67
}

Olivier Dehaene's avatar
Olivier Dehaene committed
68
69
70
message GenerateRequest {
    /// Batch
    Batch batch = 1;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
71
72
}

Olivier Dehaene's avatar
Olivier Dehaene committed
73
74
75
76
77
message GenerateResponse {
    /// Finished requests
    repeated GeneratedText generated_texts = 1;
    /// Next batch (cached)
    optional Batch batch = 2;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
78
79
}

Olivier Dehaene's avatar
Olivier Dehaene committed
80
81
82
83
message GenerateWithCacheRequest {
    /// Cached batches
    repeated Batch batches = 1;
}
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
84

Olivier Dehaene's avatar
Olivier Dehaene committed
85
86
87
88
89
90
message GenerateWithCacheResponse {
    /// Finished requests
    repeated GeneratedText generated_texts = 1;
    /// Next batch (cached)
    optional Batch batch = 2;
}