generate.proto 2.99 KB
Newer Older
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
1
2
3
4
syntax = "proto3";

package generate.v1;

Olivier Dehaene's avatar
Olivier Dehaene committed
5
service TextGenerationService {
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
6
    /// Service discovery
Olivier Dehaene's avatar
Olivier Dehaene committed
7
    rpc ServiceDiscovery (ServiceDiscoveryRequest) returns (ServiceDiscoveryResponse) {}
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
8
    /// Empties batch cache
Olivier Dehaene's avatar
Olivier Dehaene committed
9
10
11
12
13
14
15
16
17
    rpc ClearCache (ClearCacheRequest) returns (ClearCacheResponse);
    /// Generate tokens for a batch
    rpc Generate (GenerateRequest) returns (GenerateResponse);
    /// Generate tokens for a list of cached batches
    rpc GenerateWithCache (GenerateWithCacheRequest) returns (GenerateWithCacheResponse);
    /// Generate tokens until the text of at least one request of the batch is generated
    rpc GenerateUntilFinished (GenerateUntilFinishedRequest) returns (GenerateUntilFinishedResponse);
    /// Generate tokens until the text of at least one request of the cached batches i finished
    rpc GenerateUntilFinishedWithCache (GenerateUntilFinishedWithCacheRequest) returns (GenerateUntilFinishedWithCacheResponse);
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
18
19
}

Olivier Dehaene's avatar
Olivier Dehaene committed
20
21
22
/// Empty request
message ServiceDiscoveryRequest {}

Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
23
message ServiceDiscoveryResponse {
Olivier Dehaene's avatar
Olivier Dehaene committed
24
    /// Other shards urls
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
25
26
27
    repeated string urls = 1;
}

Olivier Dehaene's avatar
Olivier Dehaene committed
28
29
30
31
32
33
/// Empty request
message ClearCacheRequest {}

/// Empty response
message ClearCacheResponse {}

Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
34
35
36
37
38
39
40
41
42
43
44
45
message LogitsWarperParameters {
    float temperature = 1;
    uint32 top_k = 2;
    float top_p = 3;
    bool do_sample = 4;
}

message Request {
    /// Request ID
    uint64 id = 1;
    /// The generation context
    string inputs = 2;
Olivier Dehaene's avatar
Olivier Dehaene committed
46
47
    /// The number of tokens inside inputs
    uint32 input_length = 3;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
48
    /// Logits Warper Parameters
Olivier Dehaene's avatar
Olivier Dehaene committed
49
    LogitsWarperParameters parameters = 4;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
50
    /// Stopping criteria
Olivier Dehaene's avatar
Olivier Dehaene committed
51
    uint32 max_new_tokens = 5;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
52
53
54
55
56
57
58
}

message Batch {
    /// Batch ID
    uint64 id = 1;
    /// Individual requests
    repeated Request requests = 2;
Olivier Dehaene's avatar
Olivier Dehaene committed
59
60
61
62
    /// Batch size (==len(requests))
    uint32 size = 3;
    /// Length of the longest sequence within the batch (used for padding)
    uint32 max_sequence_length = 4;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
63
64
}

Olivier Dehaene's avatar
Olivier Dehaene committed
65
66
67
message GeneratedText {
    /// Request
    Request request = 1;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
68
69
70
71
    /// Output
    string output = 2;
}

Olivier Dehaene's avatar
Olivier Dehaene committed
72
73
74
message GenerateRequest {
    /// Batch
    Batch batch = 1;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
75
76
}

Olivier Dehaene's avatar
Olivier Dehaene committed
77
78
79
80
81
message GenerateResponse {
    /// Finished requests
    repeated GeneratedText generated_texts = 1;
    /// Next batch (cached)
    optional Batch batch = 2;
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
82
83
}

Olivier Dehaene's avatar
Olivier Dehaene committed
84
85
86
87
message GenerateWithCacheRequest {
    /// Cached batches
    repeated Batch batches = 1;
}
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
88

Olivier Dehaene's avatar
Olivier Dehaene committed
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
message GenerateWithCacheResponse {
    /// Finished requests
    repeated GeneratedText generated_texts = 1;
    /// Next batch (cached)
    optional Batch batch = 2;
}

message GenerateUntilFinishedRequest {
    /// Batch
    Batch batch = 1;
}

message GenerateUntilFinishedResponse {
    /// Finished requests
    repeated GeneratedText generated_texts = 1;
    /// Next batch (cached)
    optional Batch batch = 2;
}

message GenerateUntilFinishedWithCacheRequest {
    /// Cached batches
    repeated Batch batches = 1;
}

message GenerateUntilFinishedWithCacheResponse {
    /// Finished requests
    repeated GeneratedText generated_texts = 1;
    /// Next batch (cached)
    optional Batch batch = 2;
}