generate.proto 1.85 KB
Newer Older
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
syntax = "proto3";

package generate.v1;

service TextGeneration {
    /// Service discovery
    rpc ServiceDiscovery(Empty) returns (ServiceDiscoveryResponse) {}
    /// Empties batch cache
    rpc ClearCache(Empty) returns (Empty);
    /// Generate tokens for a batch without cache
    rpc Generate(Batch) returns (Response);
    /// Generate tokens for a batch with cache
    rpc GenerateWithCache(BatchCached) returns (Response);
}

message ServiceDiscoveryResponse {
    repeated string urls = 1;
}

message LogitsWarperParameters {
    float temperature = 1;
    uint32 top_k = 2;
    float top_p = 3;
    bool do_sample = 4;
}

message Request {
    /// Request ID
    uint64 id = 1;
    /// The generation context
    string inputs = 2;
    /// Logits Warper Parameters
    LogitsWarperParameters parameters = 3;
    /// Stopping criteria
    uint32 max_new_tokens = 4;
}

message Batch {
    /// Batch ID
    uint64 id = 1;
    /// Individual requests
    repeated Request requests = 2;
}

message BatchCached {
    /// Batch ID
    uint64 id = 1;
    /// Request ids within cache
    repeated uint64 request_ids = 2;
    /// Cache IDs
    repeated uint64 batch_cached_ids = 3;
    /// Batch size (sum of all batch sizes)
    uint32 total_batch_size = 4;
    /// Max sequence length
    uint32 max_sequence_length = 5;
}

message FinishedGeneration {
    /// ID of the original request
    uint64 id = 1;
    /// Output
    string output = 2;
}

message CacheEntry {
    /// Cache ID; same as batch ID
    uint64 id = 1;
    /// Requests present in cache entry
    repeated uint64 request_ids = 2;
    /// Sequence length
    uint32 sequence_length = 3;
}

message Response {
    /// Finished requests (optional)
    repeated FinishedGeneration finished = 1;
    /// Cache entry (optional)
    optional CacheEntry cache_entry = 2;
}


// Represent an empty message.
message Empty {}