Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
0c3db889
Unverified
Commit
0c3db889
authored
Sep 26, 2025
by
Chang Su
Committed by
GitHub
Sep 26, 2025
Browse files
[router][grpc] Add helpfer functions for decoder in router.rs and fix specs (#10971)
parent
2bdaf482
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
307 additions
and
106 deletions
+307
-106
python/sglang/srt/grpc/sglang_scheduler.proto
python/sglang/srt/grpc/sglang_scheduler.proto
+6
-6
python/sglang/srt/grpc/sglang_scheduler_pb2.py
python/sglang/srt/grpc/sglang_scheduler_pb2.py
+68
-68
sgl-router/src/grpc_client/sglang_scheduler.rs
sgl-router/src/grpc_client/sglang_scheduler.rs
+9
-6
sgl-router/src/proto/sglang_scheduler.proto
sgl-router/src/proto/sglang_scheduler.proto
+5
-5
sgl-router/src/protocols/spec.rs
sgl-router/src/protocols/spec.rs
+3
-3
sgl-router/src/routers/grpc/router.rs
sgl-router/src/routers/grpc/router.rs
+216
-18
No files found.
python/sglang/srt/grpc/sglang_scheduler.proto
View file @
0c3db889
...
@@ -36,9 +36,9 @@ message SamplingParams {
...
@@ -36,9 +36,9 @@ message SamplingParams {
float
presence_penalty
=
6
;
float
presence_penalty
=
6
;
float
repetition_penalty
=
7
;
float
repetition_penalty
=
7
;
int32
max_new_tokens
=
8
;
optional
int32
max_new_tokens
=
8
;
repeated
string
stop
=
9
;
repeated
string
stop
=
9
;
repeated
int32
stop_token_ids
=
10
;
repeated
u
int32
stop_token_ids
=
10
;
bool
skip_special_tokens
=
11
;
bool
skip_special_tokens
=
11
;
bool
spaces_between_special_tokens
=
12
;
bool
spaces_between_special_tokens
=
12
;
...
@@ -98,7 +98,7 @@ message GenerateRequest {
...
@@ -98,7 +98,7 @@ message GenerateRequest {
bool
return_logprob
=
5
;
bool
return_logprob
=
5
;
int32
logprob_start_len
=
6
;
int32
logprob_start_len
=
6
;
int32
top_logprobs_num
=
7
;
int32
top_logprobs_num
=
7
;
repeated
int32
token_ids_logprob
=
8
;
repeated
u
int32
token_ids_logprob
=
8
;
bool
return_hidden_states
=
9
;
bool
return_hidden_states
=
9
;
// For disaggregated serving
// For disaggregated serving
...
@@ -129,7 +129,7 @@ message GenerateRequest {
...
@@ -129,7 +129,7 @@ message GenerateRequest {
message
TokenizedInput
{
message
TokenizedInput
{
string
original_text
=
1
;
// For reference
string
original_text
=
1
;
// For reference
repeated
int32
input_ids
=
2
;
repeated
u
int32
input_ids
=
2
;
}
}
message
MultimodalInputs
{
message
MultimodalInputs
{
...
@@ -167,7 +167,7 @@ message GenerateResponse {
...
@@ -167,7 +167,7 @@ message GenerateResponse {
message
GenerateStreamChunk
{
message
GenerateStreamChunk
{
// Generated tokens (incremental chunk)
// Generated tokens (incremental chunk)
repeated
int32
token_ids
=
1
;
repeated
u
int32
token_ids
=
1
;
// Cumulative counts
// Cumulative counts
int32
prompt_tokens
=
2
;
int32
prompt_tokens
=
2
;
...
@@ -183,7 +183,7 @@ message GenerateStreamChunk {
...
@@ -183,7 +183,7 @@ message GenerateStreamChunk {
message
GenerateComplete
{
message
GenerateComplete
{
// Final output
// Final output
repeated
int32
output_ids
=
1
;
repeated
u
int32
output_ids
=
1
;
// Finish reason
// Finish reason
enum
FinishReason
{
enum
FinishReason
{
...
...
python/sglang/srt/grpc/sglang_scheduler_pb2.py
View file @
0c3db889
...
@@ -29,7 +29,7 @@ from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__
...
@@ -29,7 +29,7 @@ from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__
from
google.protobuf
import
struct_pb2
as
google_dot_protobuf_dot_struct__pb2
from
google.protobuf
import
struct_pb2
as
google_dot_protobuf_dot_struct__pb2
DESCRIPTOR
=
_descriptor_pool
.
Default
().
AddSerializedFile
(
b
'
\n\x16
sglang_scheduler.proto
\x12\x15
sglang.grpc.scheduler
\x1a\x1f
google/protobuf/timestamp.proto
\x1a\x1c
google/protobuf/struct.proto
\"\x
c9
\x05\n\x0e
SamplingParams
\x12\x13\n\x0b
temperature
\x18\x01
\x01
(
\x02\x12\r\n\x05
top_p
\x18\x02
\x01
(
\x02\x12\r\n\x05
top_k
\x18\x03
\x01
(
\x05\x12\r\n\x05
min_p
\x18\x04
\x01
(
\x02\x12\x19\n\x11\x66
requency_penalty
\x18\x05
\x01
(
\x02\x12\x18\n\x10
presence_penalty
\x18\x06
\x01
(
\x02\x12\x1a\n\x12
repetition_penalty
\x18\x07
\x01
(
\x02\x12\x1
6
\n\x0e
max_new_tokens
\x18\x08
\x01
(
\x05\x12\x0c\n\x04
stop
\x18\t
\x03
(
\t\x12\x16\n\x0e
stop_token_ids
\x18\n
\x03
(
\
x05
\x12\x1b\n\x13
skip_special_tokens
\x18\x0b
\x01
(
\x08\x12
%
\n\x1d
spaces_between_special_tokens
\x18\x0c
\x01
(
\x08\x12\x0f\n\x05
regex
\x18\r
\x01
(
\t
H
\x00\x12\x15\n\x0b
json_schema
\x18\x0e
\x01
(
\t
H
\x00\x12\x16\n\x0c\x65\x62
nf_grammar
\x18\x0f
\x01
(
\t
H
\x00\x12\x18\n\x0e
structural_tag
\x18\x10
\x01
(
\t
H
\x00\x12\x11\n\t
lora_path
\x18\x11
\x01
(
\t\x12\t\n\x01
n
\x18\x12
\x01
(
\x05\x12\x15\n\r
token_healing
\x18\x13
\x01
(
\x08\x12\x16\n\x0e
min_new_tokens
\x18\x14
\x01
(
\x05\x12\x12\n\n
ignore_eos
\x18\x15
\x01
(
\x08\x12\x14\n\x0c
no_stop_trim
\x18\x16
\x01
(
\x08\x12\x17\n\x0f
stream_interval
\x18\x17
\x01
(
\x05\x12
H
\n\n
logit_bias
\x18\x18
\x03
(
\x0b\x32\x34
.sglang.grpc.scheduler.SamplingParams.LogitBiasEntry
\x12
.
\n\r
custom_params
\x18\x19
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\x1a\x30\n\x0e
LogitBiasEntry
\x12\x0b\n\x03
key
\x18\x01
\x01
(
\t\x12\r\n\x05
value
\x18\x02
\x01
(
\x02
:
\x02\x38\x01\x42\x0c\n\n
constraint
\"
]
\n\x13\x44
isaggregatedParams
\x12\x16\n\x0e\x62
ootstrap_host
\x18\x01
\x01
(
\t\x12\x16\n\x0e\x62
ootstrap_port
\x18\x02
\x01
(
\x05\x12\x16\n\x0e\x62
ootstrap_room
\x18\x03
\x01
(
\x05\"\xf9\x04\n\x0f
GenerateRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\t
tokenized
\x18\x02
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\x12
:
\n\t
mm_inputs
\x18\x03
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.MultimodalInputs
\x12
>
\n\x0f
sampling_params
\x18\x04
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.SamplingParams
\x12\x16\n\x0e
return_logprob
\x18\x05
\x01
(
\x08\x12\x19\n\x11
logprob_start_len
\x18\x06
\x01
(
\x05\x12\x18\n\x10
top_logprobs_num
\x18\x07
\x01
(
\x05\x12\x19\n\x11
token_ids_logprob
\x18\x08
\x03
(
\
x05
\x12\x1c\n\x14
return_hidden_states
\x18\t
\x01
(
\x08\x12
H
\n\x14\x64
isaggregated_params
\x18\n
\x01
(
\x0b\x32
*.sglang.grpc.scheduler.DisaggregatedParams
\x12\x1e\n\x16\x63
ustom_logit_processor
\x18\x0b
\x01
(
\t\x12
-
\n\t
timestamp
\x18\x0c
\x01
(
\x0b\x32\x1a
.google.protobuf.Timestamp
\x12\x13\n\x0b
log_metrics
\x18\r
\x01
(
\x08\x12\x14\n\x0c
input_embeds
\x18\x0e
\x03
(
\x02\x12\x0f\n\x07
lora_id
\x18\x0f
\x01
(
\t\x12\x1a\n\x12\x64\x61
ta_parallel_rank
\x18\x10
\x01
(
\x05\x12\x15\n\r
dp_balance_id
\x18\x11
\x01
(
\x05\x12\x0e\n\x06
stream
\x18\x12
\x01
(
\x08\"
:
\n\x0e
TokenizedInput
\x12\x15\n\r
original_text
\x18\x01
\x01
(
\t\x12\x11\n\t
input_ids
\x18\x02
\x03
(
\
x05
\"\xd3\x01\n\x10
MultimodalInputs
\x12\x12\n\n
image_urls
\x18\x01
\x03
(
\t\x12\x12\n\n
video_urls
\x18\x02
\x03
(
\t\x12\x12\n\n
audio_urls
\x18\x03
\x03
(
\t\x12\x33\n\x12
processed_features
\x18\x04
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\x12\x12\n\n
image_data
\x18\x05
\x03
(
\x0c\x12\x12\n\n
video_data
\x18\x06
\x03
(
\x0c\x12\x12\n\n
audio_data
\x18\x07
\x03
(
\x0c\x12\x12\n\n
modalities
\x18\x08
\x03
(
\t\"\xe3\x01\n\x10
GenerateResponse
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12
;
\n\x05\x63
hunk
\x18\x02
\x01
(
\x0b\x32
*.sglang.grpc.scheduler.GenerateStreamChunkH
\x00\x12
;
\n\x08\x63
omplete
\x18\x03
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.GenerateCompleteH
\x00\x12\x35\n\x05\x65
rror
\x18\x04
\x01
(
\x0b\x32
$.sglang.grpc.scheduler.GenerateErrorH
\x00\x42\n\n\x08
response
\"\xbb\x01\n\x13
GenerateStreamChunk
\x12\x11\n\t
token_ids
\x18\x01
\x03
(
\
x05
\x12\x15\n\r
prompt_tokens
\x18\x02
\x01
(
\x05\x12\x19\n\x11\x63
ompletion_tokens
\x18\x03
\x01
(
\x05\x12\x15\n\r
cached_tokens
\x18\x04
\x01
(
\x05\x12\x31\n\x08
logprobs
\x18\x05
\x01
(
\x0b\x32\x1f
.sglang.grpc.scheduler.LogProbs
\x12\x15\n\r
hidden_states
\x18\x06
\x03
(
\x02\"\x81\x03\n\x10
GenerateComplete
\x12\x12\n\n
output_ids
\x18\x01
\x03
(
\
x05
\x12
K
\n\r
finish_reason
\x18\x02
\x01
(
\x0e\x32\x34
.sglang.grpc.scheduler.GenerateComplete.FinishReason
\x12\x15\n\r
prompt_tokens
\x18\x03
\x01
(
\x05\x12\x19\n\x11\x63
ompletion_tokens
\x18\x04
\x01
(
\x05\x12\x15\n\r
cached_tokens
\x18\x05
\x01
(
\x05\x12\x35\n\x0c\x61
ll_logprobs
\x18\x06
\x03
(
\x0b\x32\x1f
.sglang.grpc.scheduler.LogProbs
\x12
>
\n\x11\x61
ll_hidden_states
\x18\x07
\x03
(
\x0b\x32
#.sglang.grpc.scheduler.HiddenStates
\"
L
\n\x0c\x46
inishReason
\x12\x08\n\x04
STOP
\x10\x00\x12\n\n\x06
LENGTH
\x10\x01\x12\r\n\t
EOS_TOKEN
\x10\x02\x12\x0c\n\x08
STOP_STR
\x10\x03\x12\t\n\x05\x41\x42
ORT
\x10\x04\"
K
\n\r
GenerateError
\x12\x0f\n\x07
message
\x18\x01
\x01
(
\t\x12\x18\n\x10
http_status_code
\x18\x02
\x01
(
\t\x12\x0f\n\x07\x64\x65
tails
\x18\x03
\x01
(
\t\"\x84\x01\n\x08
LogProbs
\x12\x16\n\x0e
token_logprobs
\x18\x01
\x03
(
\x02\x12\x11\n\t
token_ids
\x18\x02
\x03
(
\x05\x12\x38\n\x0c
top_logprobs
\x18\x03
\x03
(
\x0b\x32\"
.sglang.grpc.scheduler.TopLogProbs
\x12\x13\n\x0b
token_texts
\x18\x04
\x03
(
\t\"
E
\n\x0b
TopLogProbs
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\x11\n\t
token_ids
\x18\x02
\x03
(
\x05\x12\x13\n\x0b
token_texts
\x18\x03
\x03
(
\t\"
?
\n\x0c
HiddenStates
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\r\n\x05
layer
\x18\x02
\x01
(
\x05\x12\x10\n\x08
position
\x18\x03
\x01
(
\x05\"\xca\x02\n\x0c\x45
mbedRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\t
tokenized
\x18\x02
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\x12
:
\n\t
mm_inputs
\x18\x04
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.MultimodalInputs
\x12
>
\n\x0f
sampling_params
\x18\x05
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.SamplingParams
\x12\x13\n\x0b
log_metrics
\x18\x06
\x01
(
\x08\x12\x16\n\x0e
token_type_ids
\x18\x07
\x03
(
\x05\x12\x1a\n\x12\x64\x61
ta_parallel_rank
\x18\x08
\x01
(
\x05\x12\x18\n\x10
is_cross_encoder
\x18\t
\x01
(
\x08\x12\r\n\x05
texts
\x18\n
\x03
(
\t\"\x9d\x01\n\r
EmbedResponse
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\x08\x63
omplete
\x18\x02
\x01
(
\x0b\x32
$.sglang.grpc.scheduler.EmbedCompleteH
\x00\x12\x32\n\x05\x65
rror
\x18\x03
\x01
(
\x0b\x32
!.sglang.grpc.scheduler.EmbedErrorH
\x00\x42\n\n\x08
response
\"\xa3\x01\n\r
EmbedComplete
\x12\x11\n\t
embedding
\x18\x01
\x03
(
\x02\x12\x15\n\r
prompt_tokens
\x18\x02
\x01
(
\x05\x12\x15\n\r
cached_tokens
\x18\x03
\x01
(
\x05\x12\x15\n\r
embedding_dim
\x18\x04
\x01
(
\x05\x12
:
\n\x10\x62\x61
tch_embeddings
\x18\x05
\x03
(
\x0b\x32
.sglang.grpc.scheduler.Embedding
\"
*
\n\t
Embedding
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\r\n\x05
index
\x18\x02
\x01
(
\x05\"
<
\n\n
EmbedError
\x12\x0f\n\x07
message
\x18\x01
\x01
(
\t\x12\x0c\n\x04\x63
ode
\x18\x02
\x01
(
\t\x12\x0f\n\x07\x64\x65
tails
\x18\x03
\x01
(
\t\"
N
\n\x12
HealthCheckRequest
\x12\x38\n\t
tokenized
\x18\x01
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\"
7
\n\x13
HealthCheckResponse
\x12\x0f\n\x07
healthy
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
2
\n\x0c\x41\x62
ortRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x0e\n\x06
reason
\x18\x02
\x01
(
\t\"
1
\n\r
AbortResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
I
\n\x0f
LoadLoRARequest
\x12\x12\n\n
adapter_id
\x18\x01
\x01
(
\t\x12\x14\n\x0c\x61\x64\x61
pter_path
\x18\x02
\x01
(
\t\x12\x0c\n\x04
rank
\x18\x03
\x01
(
\x05\"
H
\n\x10
LoadLoRAResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x12\n\n
adapter_id
\x18\x02
\x01
(
\t\x12\x0f\n\x07
message
\x18\x03
\x01
(
\t\"\'\n\x11
UnloadLoRARequest
\x12\x12\n\n
adapter_id
\x18\x01
\x01
(
\t\"
6
\n\x12
UnloadLoRAResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
w
\n\x14
UpdateWeightsRequest
\x12\x13\n\t
disk_path
\x18\x01
\x01
(
\t
H
\x00\x12\x15\n\x0b
tensor_data
\x18\x02
\x01
(
\x0c
H
\x00\x12\x14\n\n
remote_url
\x18\x03
\x01
(
\t
H
\x00\x12\x13\n\x0b
weight_name
\x18\x04
\x01
(
\t
B
\x08\n\x06
source
\"
9
\n\x15
UpdateWeightsResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
-
\n\x17
GetInternalStateRequest
\x12\x12\n\n
state_keys
\x18\x01
\x03
(
\t\"
B
\n\x18
GetInternalStateResponse
\x12
&
\n\x05
state
\x18\x01
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\"
A
\n\x17
SetInternalStateRequest
\x12
&
\n\x05
state
\x18\x01
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\"
<
\n\x18
SetInternalStateResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t
2
\xfe\x02\n\x0f
SglangScheduler
\x12
]
\n\x08
Generate
\x12
&.sglang.grpc.scheduler.GenerateRequest
\x1a\'
.sglang.grpc.scheduler.GenerateResponse0
\x01\x12
R
\n\x05\x45
mbed
\x12
#.sglang.grpc.scheduler.EmbedRequest
\x1a
$.sglang.grpc.scheduler.EmbedResponse
\x12\x64\n\x0b
HealthCheck
\x12
).sglang.grpc.scheduler.HealthCheckRequest
\x1a
*.sglang.grpc.scheduler.HealthCheckResponse
\x12
R
\n\x05\x41\x62
ort
\x12
#.sglang.grpc.scheduler.AbortRequest
\x1a
$.sglang.grpc.scheduler.AbortResponseb
\x06
proto3'
)
DESCRIPTOR
=
_descriptor_pool
.
Default
().
AddSerializedFile
(
b
'
\n\x16
sglang_scheduler.proto
\x12\x15
sglang.grpc.scheduler
\x1a\x1f
google/protobuf/timestamp.proto
\x1a\x1c
google/protobuf/struct.proto
\"\x
e1
\x05\n\x0e
SamplingParams
\x12\x13\n\x0b
temperature
\x18\x01
\x01
(
\x02\x12\r\n\x05
top_p
\x18\x02
\x01
(
\x02\x12\r\n\x05
top_k
\x18\x03
\x01
(
\x05\x12\r\n\x05
min_p
\x18\x04
\x01
(
\x02\x12\x19\n\x11\x66
requency_penalty
\x18\x05
\x01
(
\x02\x12\x18\n\x10
presence_penalty
\x18\x06
\x01
(
\x02\x12\x1a\n\x12
repetition_penalty
\x18\x07
\x01
(
\x02\x12\x1
b
\n\x0e
max_new_tokens
\x18\x08
\x01
(
\x05
H
\x01\x88\x01\x01
\x12\x0c\n\x04
stop
\x18\t
\x03
(
\t\x12\x16\n\x0e
stop_token_ids
\x18\n
\x03
(
\
r
\x12\x1b\n\x13
skip_special_tokens
\x18\x0b
\x01
(
\x08\x12
%
\n\x1d
spaces_between_special_tokens
\x18\x0c
\x01
(
\x08\x12\x0f\n\x05
regex
\x18\r
\x01
(
\t
H
\x00\x12\x15\n\x0b
json_schema
\x18\x0e
\x01
(
\t
H
\x00\x12\x16\n\x0c\x65\x62
nf_grammar
\x18\x0f
\x01
(
\t
H
\x00\x12\x18\n\x0e
structural_tag
\x18\x10
\x01
(
\t
H
\x00\x12\x11\n\t
lora_path
\x18\x11
\x01
(
\t\x12\t\n\x01
n
\x18\x12
\x01
(
\x05\x12\x15\n\r
token_healing
\x18\x13
\x01
(
\x08\x12\x16\n\x0e
min_new_tokens
\x18\x14
\x01
(
\x05\x12\x12\n\n
ignore_eos
\x18\x15
\x01
(
\x08\x12\x14\n\x0c
no_stop_trim
\x18\x16
\x01
(
\x08\x12\x17\n\x0f
stream_interval
\x18\x17
\x01
(
\x05\x12
H
\n\n
logit_bias
\x18\x18
\x03
(
\x0b\x32\x34
.sglang.grpc.scheduler.SamplingParams.LogitBiasEntry
\x12
.
\n\r
custom_params
\x18\x19
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\x1a\x30\n\x0e
LogitBiasEntry
\x12\x0b\n\x03
key
\x18\x01
\x01
(
\t\x12\r\n\x05
value
\x18\x02
\x01
(
\x02
:
\x02\x38\x01\x42\x0c\n\n
constraint
B
\x11\n\x0f
_max_new_tokens
\"
]
\n\x13\x44
isaggregatedParams
\x12\x16\n\x0e\x62
ootstrap_host
\x18\x01
\x01
(
\t\x12\x16\n\x0e\x62
ootstrap_port
\x18\x02
\x01
(
\x05\x12\x16\n\x0e\x62
ootstrap_room
\x18\x03
\x01
(
\x05\"\xf9\x04\n\x0f
GenerateRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\t
tokenized
\x18\x02
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\x12
:
\n\t
mm_inputs
\x18\x03
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.MultimodalInputs
\x12
>
\n\x0f
sampling_params
\x18\x04
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.SamplingParams
\x12\x16\n\x0e
return_logprob
\x18\x05
\x01
(
\x08\x12\x19\n\x11
logprob_start_len
\x18\x06
\x01
(
\x05\x12\x18\n\x10
top_logprobs_num
\x18\x07
\x01
(
\x05\x12\x19\n\x11
token_ids_logprob
\x18\x08
\x03
(
\
r
\x12\x1c\n\x14
return_hidden_states
\x18\t
\x01
(
\x08\x12
H
\n\x14\x64
isaggregated_params
\x18\n
\x01
(
\x0b\x32
*.sglang.grpc.scheduler.DisaggregatedParams
\x12\x1e\n\x16\x63
ustom_logit_processor
\x18\x0b
\x01
(
\t\x12
-
\n\t
timestamp
\x18\x0c
\x01
(
\x0b\x32\x1a
.google.protobuf.Timestamp
\x12\x13\n\x0b
log_metrics
\x18\r
\x01
(
\x08\x12\x14\n\x0c
input_embeds
\x18\x0e
\x03
(
\x02\x12\x0f\n\x07
lora_id
\x18\x0f
\x01
(
\t\x12\x1a\n\x12\x64\x61
ta_parallel_rank
\x18\x10
\x01
(
\x05\x12\x15\n\r
dp_balance_id
\x18\x11
\x01
(
\x05\x12\x0e\n\x06
stream
\x18\x12
\x01
(
\x08\"
:
\n\x0e
TokenizedInput
\x12\x15\n\r
original_text
\x18\x01
\x01
(
\t\x12\x11\n\t
input_ids
\x18\x02
\x03
(
\
r
\"\xd3\x01\n\x10
MultimodalInputs
\x12\x12\n\n
image_urls
\x18\x01
\x03
(
\t\x12\x12\n\n
video_urls
\x18\x02
\x03
(
\t\x12\x12\n\n
audio_urls
\x18\x03
\x03
(
\t\x12\x33\n\x12
processed_features
\x18\x04
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\x12\x12\n\n
image_data
\x18\x05
\x03
(
\x0c\x12\x12\n\n
video_data
\x18\x06
\x03
(
\x0c\x12\x12\n\n
audio_data
\x18\x07
\x03
(
\x0c\x12\x12\n\n
modalities
\x18\x08
\x03
(
\t\"\xe3\x01\n\x10
GenerateResponse
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12
;
\n\x05\x63
hunk
\x18\x02
\x01
(
\x0b\x32
*.sglang.grpc.scheduler.GenerateStreamChunkH
\x00\x12
;
\n\x08\x63
omplete
\x18\x03
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.GenerateCompleteH
\x00\x12\x35\n\x05\x65
rror
\x18\x04
\x01
(
\x0b\x32
$.sglang.grpc.scheduler.GenerateErrorH
\x00\x42\n\n\x08
response
\"\xbb\x01\n\x13
GenerateStreamChunk
\x12\x11\n\t
token_ids
\x18\x01
\x03
(
\
r
\x12\x15\n\r
prompt_tokens
\x18\x02
\x01
(
\x05\x12\x19\n\x11\x63
ompletion_tokens
\x18\x03
\x01
(
\x05\x12\x15\n\r
cached_tokens
\x18\x04
\x01
(
\x05\x12\x31\n\x08
logprobs
\x18\x05
\x01
(
\x0b\x32\x1f
.sglang.grpc.scheduler.LogProbs
\x12\x15\n\r
hidden_states
\x18\x06
\x03
(
\x02\"\x81\x03\n\x10
GenerateComplete
\x12\x12\n\n
output_ids
\x18\x01
\x03
(
\
r
\x12
K
\n\r
finish_reason
\x18\x02
\x01
(
\x0e\x32\x34
.sglang.grpc.scheduler.GenerateComplete.FinishReason
\x12\x15\n\r
prompt_tokens
\x18\x03
\x01
(
\x05\x12\x19\n\x11\x63
ompletion_tokens
\x18\x04
\x01
(
\x05\x12\x15\n\r
cached_tokens
\x18\x05
\x01
(
\x05\x12\x35\n\x0c\x61
ll_logprobs
\x18\x06
\x03
(
\x0b\x32\x1f
.sglang.grpc.scheduler.LogProbs
\x12
>
\n\x11\x61
ll_hidden_states
\x18\x07
\x03
(
\x0b\x32
#.sglang.grpc.scheduler.HiddenStates
\"
L
\n\x0c\x46
inishReason
\x12\x08\n\x04
STOP
\x10\x00\x12\n\n\x06
LENGTH
\x10\x01\x12\r\n\t
EOS_TOKEN
\x10\x02\x12\x0c\n\x08
STOP_STR
\x10\x03\x12\t\n\x05\x41\x42
ORT
\x10\x04\"
K
\n\r
GenerateError
\x12\x0f\n\x07
message
\x18\x01
\x01
(
\t\x12\x18\n\x10
http_status_code
\x18\x02
\x01
(
\t\x12\x0f\n\x07\x64\x65
tails
\x18\x03
\x01
(
\t\"\x84\x01\n\x08
LogProbs
\x12\x16\n\x0e
token_logprobs
\x18\x01
\x03
(
\x02\x12\x11\n\t
token_ids
\x18\x02
\x03
(
\x05\x12\x38\n\x0c
top_logprobs
\x18\x03
\x03
(
\x0b\x32\"
.sglang.grpc.scheduler.TopLogProbs
\x12\x13\n\x0b
token_texts
\x18\x04
\x03
(
\t\"
E
\n\x0b
TopLogProbs
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\x11\n\t
token_ids
\x18\x02
\x03
(
\x05\x12\x13\n\x0b
token_texts
\x18\x03
\x03
(
\t\"
?
\n\x0c
HiddenStates
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\r\n\x05
layer
\x18\x02
\x01
(
\x05\x12\x10\n\x08
position
\x18\x03
\x01
(
\x05\"\xca\x02\n\x0c\x45
mbedRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\t
tokenized
\x18\x02
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\x12
:
\n\t
mm_inputs
\x18\x04
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.MultimodalInputs
\x12
>
\n\x0f
sampling_params
\x18\x05
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.SamplingParams
\x12\x13\n\x0b
log_metrics
\x18\x06
\x01
(
\x08\x12\x16\n\x0e
token_type_ids
\x18\x07
\x03
(
\x05\x12\x1a\n\x12\x64\x61
ta_parallel_rank
\x18\x08
\x01
(
\x05\x12\x18\n\x10
is_cross_encoder
\x18\t
\x01
(
\x08\x12\r\n\x05
texts
\x18\n
\x03
(
\t\"\x9d\x01\n\r
EmbedResponse
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\x08\x63
omplete
\x18\x02
\x01
(
\x0b\x32
$.sglang.grpc.scheduler.EmbedCompleteH
\x00\x12\x32\n\x05\x65
rror
\x18\x03
\x01
(
\x0b\x32
!.sglang.grpc.scheduler.EmbedErrorH
\x00\x42\n\n\x08
response
\"\xa3\x01\n\r
EmbedComplete
\x12\x11\n\t
embedding
\x18\x01
\x03
(
\x02\x12\x15\n\r
prompt_tokens
\x18\x02
\x01
(
\x05\x12\x15\n\r
cached_tokens
\x18\x03
\x01
(
\x05\x12\x15\n\r
embedding_dim
\x18\x04
\x01
(
\x05\x12
:
\n\x10\x62\x61
tch_embeddings
\x18\x05
\x03
(
\x0b\x32
.sglang.grpc.scheduler.Embedding
\"
*
\n\t
Embedding
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\r\n\x05
index
\x18\x02
\x01
(
\x05\"
<
\n\n
EmbedError
\x12\x0f\n\x07
message
\x18\x01
\x01
(
\t\x12\x0c\n\x04\x63
ode
\x18\x02
\x01
(
\t\x12\x0f\n\x07\x64\x65
tails
\x18\x03
\x01
(
\t\"
N
\n\x12
HealthCheckRequest
\x12\x38\n\t
tokenized
\x18\x01
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\"
7
\n\x13
HealthCheckResponse
\x12\x0f\n\x07
healthy
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
2
\n\x0c\x41\x62
ortRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x0e\n\x06
reason
\x18\x02
\x01
(
\t\"
1
\n\r
AbortResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
I
\n\x0f
LoadLoRARequest
\x12\x12\n\n
adapter_id
\x18\x01
\x01
(
\t\x12\x14\n\x0c\x61\x64\x61
pter_path
\x18\x02
\x01
(
\t\x12\x0c\n\x04
rank
\x18\x03
\x01
(
\x05\"
H
\n\x10
LoadLoRAResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x12\n\n
adapter_id
\x18\x02
\x01
(
\t\x12\x0f\n\x07
message
\x18\x03
\x01
(
\t\"\'\n\x11
UnloadLoRARequest
\x12\x12\n\n
adapter_id
\x18\x01
\x01
(
\t\"
6
\n\x12
UnloadLoRAResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
w
\n\x14
UpdateWeightsRequest
\x12\x13\n\t
disk_path
\x18\x01
\x01
(
\t
H
\x00\x12\x15\n\x0b
tensor_data
\x18\x02
\x01
(
\x0c
H
\x00\x12\x14\n\n
remote_url
\x18\x03
\x01
(
\t
H
\x00\x12\x13\n\x0b
weight_name
\x18\x04
\x01
(
\t
B
\x08\n\x06
source
\"
9
\n\x15
UpdateWeightsResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
-
\n\x17
GetInternalStateRequest
\x12\x12\n\n
state_keys
\x18\x01
\x03
(
\t\"
B
\n\x18
GetInternalStateResponse
\x12
&
\n\x05
state
\x18\x01
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\"
A
\n\x17
SetInternalStateRequest
\x12
&
\n\x05
state
\x18\x01
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\"
<
\n\x18
SetInternalStateResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t
2
\xfe\x02\n\x0f
SglangScheduler
\x12
]
\n\x08
Generate
\x12
&.sglang.grpc.scheduler.GenerateRequest
\x1a\'
.sglang.grpc.scheduler.GenerateResponse0
\x01\x12
R
\n\x05\x45
mbed
\x12
#.sglang.grpc.scheduler.EmbedRequest
\x1a
$.sglang.grpc.scheduler.EmbedResponse
\x12\x64\n\x0b
HealthCheck
\x12
).sglang.grpc.scheduler.HealthCheckRequest
\x1a
*.sglang.grpc.scheduler.HealthCheckResponse
\x12
R
\n\x05\x41\x62
ort
\x12
#.sglang.grpc.scheduler.AbortRequest
\x1a
$.sglang.grpc.scheduler.AbortResponseb
\x06
proto3'
)
_globals
=
globals
()
_globals
=
globals
()
_builder
.
BuildMessageAndEnumDescriptors
(
DESCRIPTOR
,
_globals
)
_builder
.
BuildMessageAndEnumDescriptors
(
DESCRIPTOR
,
_globals
)
...
@@ -39,71 +39,71 @@ if not _descriptor._USE_C_DESCRIPTORS:
...
@@ -39,71 +39,71 @@ if not _descriptor._USE_C_DESCRIPTORS:
_globals
[
'_SAMPLINGPARAMS_LOGITBIASENTRY'
].
_loaded_options
=
None
_globals
[
'_SAMPLINGPARAMS_LOGITBIASENTRY'
].
_loaded_options
=
None
_globals
[
'_SAMPLINGPARAMS_LOGITBIASENTRY'
].
_serialized_options
=
b
'8
\001
'
_globals
[
'_SAMPLINGPARAMS_LOGITBIASENTRY'
].
_serialized_options
=
b
'8
\001
'
_globals
[
'_SAMPLINGPARAMS'
].
_serialized_start
=
113
_globals
[
'_SAMPLINGPARAMS'
].
_serialized_start
=
113
_globals
[
'_SAMPLINGPARAMS'
].
_serialized_end
=
8
26
_globals
[
'_SAMPLINGPARAMS'
].
_serialized_end
=
8
50
_globals
[
'_SAMPLINGPARAMS_LOGITBIASENTRY'
].
_serialized_start
=
76
4
_globals
[
'_SAMPLINGPARAMS_LOGITBIASENTRY'
].
_serialized_start
=
76
9
_globals
[
'_SAMPLINGPARAMS_LOGITBIASENTRY'
].
_serialized_end
=
81
2
_globals
[
'_SAMPLINGPARAMS_LOGITBIASENTRY'
].
_serialized_end
=
81
7
_globals
[
'_DISAGGREGATEDPARAMS'
].
_serialized_start
=
82
8
_globals
[
'_DISAGGREGATEDPARAMS'
].
_serialized_start
=
8
5
2
_globals
[
'_DISAGGREGATEDPARAMS'
].
_serialized_end
=
9
21
_globals
[
'_DISAGGREGATEDPARAMS'
].
_serialized_end
=
9
45
_globals
[
'_GENERATEREQUEST'
].
_serialized_start
=
9
2
4
_globals
[
'_GENERATEREQUEST'
].
_serialized_start
=
94
8
_globals
[
'_GENERATEREQUEST'
].
_serialized_end
=
15
57
_globals
[
'_GENERATEREQUEST'
].
_serialized_end
=
15
81
_globals
[
'_TOKENIZEDINPUT'
].
_serialized_start
=
15
59
_globals
[
'_TOKENIZEDINPUT'
].
_serialized_start
=
15
83
_globals
[
'_TOKENIZEDINPUT'
].
_serialized_end
=
161
7
_globals
[
'_TOKENIZEDINPUT'
].
_serialized_end
=
16
4
1
_globals
[
'_MULTIMODALINPUTS'
].
_serialized_start
=
16
20
_globals
[
'_MULTIMODALINPUTS'
].
_serialized_start
=
16
44
_globals
[
'_MULTIMODALINPUTS'
].
_serialized_end
=
18
31
_globals
[
'_MULTIMODALINPUTS'
].
_serialized_end
=
18
55
_globals
[
'_GENERATERESPONSE'
].
_serialized_start
=
18
34
_globals
[
'_GENERATERESPONSE'
].
_serialized_start
=
18
58
_globals
[
'_GENERATERESPONSE'
].
_serialized_end
=
20
61
_globals
[
'_GENERATERESPONSE'
].
_serialized_end
=
20
85
_globals
[
'_GENERATESTREAMCHUNK'
].
_serialized_start
=
20
64
_globals
[
'_GENERATESTREAMCHUNK'
].
_serialized_start
=
20
88
_globals
[
'_GENERATESTREAMCHUNK'
].
_serialized_end
=
225
1
_globals
[
'_GENERATESTREAMCHUNK'
].
_serialized_end
=
22
7
5
_globals
[
'_GENERATECOMPLETE'
].
_serialized_start
=
22
54
_globals
[
'_GENERATECOMPLETE'
].
_serialized_start
=
22
78
_globals
[
'_GENERATECOMPLETE'
].
_serialized_end
=
263
9
_globals
[
'_GENERATECOMPLETE'
].
_serialized_end
=
26
6
3
_globals
[
'_GENERATECOMPLETE_FINISHREASON'
].
_serialized_start
=
25
63
_globals
[
'_GENERATECOMPLETE_FINISHREASON'
].
_serialized_start
=
25
87
_globals
[
'_GENERATECOMPLETE_FINISHREASON'
].
_serialized_end
=
263
9
_globals
[
'_GENERATECOMPLETE_FINISHREASON'
].
_serialized_end
=
26
6
3
_globals
[
'_GENERATEERROR'
].
_serialized_start
=
26
41
_globals
[
'_GENERATEERROR'
].
_serialized_start
=
26
65
_globals
[
'_GENERATEERROR'
].
_serialized_end
=
27
16
_globals
[
'_GENERATEERROR'
].
_serialized_end
=
27
40
_globals
[
'_LOGPROBS'
].
_serialized_start
=
27
19
_globals
[
'_LOGPROBS'
].
_serialized_start
=
27
43
_globals
[
'_LOGPROBS'
].
_serialized_end
=
285
1
_globals
[
'_LOGPROBS'
].
_serialized_end
=
28
7
5
_globals
[
'_TOPLOGPROBS'
].
_serialized_start
=
28
53
_globals
[
'_TOPLOGPROBS'
].
_serialized_start
=
28
77
_globals
[
'_TOPLOGPROBS'
].
_serialized_end
=
29
22
_globals
[
'_TOPLOGPROBS'
].
_serialized_end
=
29
46
_globals
[
'_HIDDENSTATES'
].
_serialized_start
=
29
2
4
_globals
[
'_HIDDENSTATES'
].
_serialized_start
=
294
8
_globals
[
'_HIDDENSTATES'
].
_serialized_end
=
2987
_globals
[
'_HIDDENSTATES'
].
_serialized_end
=
3011
_globals
[
'_EMBEDREQUEST'
].
_serialized_start
=
2990
_globals
[
'_EMBEDREQUEST'
].
_serialized_start
=
3014
_globals
[
'_EMBEDREQUEST'
].
_serialized_end
=
33
20
_globals
[
'_EMBEDREQUEST'
].
_serialized_end
=
33
44
_globals
[
'_EMBEDRESPONSE'
].
_serialized_start
=
33
23
_globals
[
'_EMBEDRESPONSE'
].
_serialized_start
=
33
47
_globals
[
'_EMBEDRESPONSE'
].
_serialized_end
=
3
480
_globals
[
'_EMBEDRESPONSE'
].
_serialized_end
=
3
504
_globals
[
'_EMBEDCOMPLETE'
].
_serialized_start
=
3
483
_globals
[
'_EMBEDCOMPLETE'
].
_serialized_start
=
3
507
_globals
[
'_EMBEDCOMPLETE'
].
_serialized_end
=
36
46
_globals
[
'_EMBEDCOMPLETE'
].
_serialized_end
=
36
70
_globals
[
'_EMBEDDING'
].
_serialized_start
=
36
48
_globals
[
'_EMBEDDING'
].
_serialized_start
=
36
72
_globals
[
'_EMBEDDING'
].
_serialized_end
=
3
690
_globals
[
'_EMBEDDING'
].
_serialized_end
=
3
714
_globals
[
'_EMBEDERROR'
].
_serialized_start
=
3
692
_globals
[
'_EMBEDERROR'
].
_serialized_start
=
3
716
_globals
[
'_EMBEDERROR'
].
_serialized_end
=
37
52
_globals
[
'_EMBEDERROR'
].
_serialized_end
=
37
76
_globals
[
'_HEALTHCHECKREQUEST'
].
_serialized_start
=
37
54
_globals
[
'_HEALTHCHECKREQUEST'
].
_serialized_start
=
37
78
_globals
[
'_HEALTHCHECKREQUEST'
].
_serialized_end
=
38
32
_globals
[
'_HEALTHCHECKREQUEST'
].
_serialized_end
=
38
56
_globals
[
'_HEALTHCHECKRESPONSE'
].
_serialized_start
=
38
34
_globals
[
'_HEALTHCHECKRESPONSE'
].
_serialized_start
=
38
58
_globals
[
'_HEALTHCHECKRESPONSE'
].
_serialized_end
=
3
889
_globals
[
'_HEALTHCHECKRESPONSE'
].
_serialized_end
=
3
913
_globals
[
'_ABORTREQUEST'
].
_serialized_start
=
3
8
91
_globals
[
'_ABORTREQUEST'
].
_serialized_start
=
391
5
_globals
[
'_ABORTREQUEST'
].
_serialized_end
=
39
41
_globals
[
'_ABORTREQUEST'
].
_serialized_end
=
39
65
_globals
[
'_ABORTRESPONSE'
].
_serialized_start
=
39
43
_globals
[
'_ABORTRESPONSE'
].
_serialized_start
=
39
67
_globals
[
'_ABORTRESPONSE'
].
_serialized_end
=
3992
_globals
[
'_ABORTRESPONSE'
].
_serialized_end
=
4016
_globals
[
'_LOADLORAREQUEST'
].
_serialized_start
=
3994
_globals
[
'_LOADLORAREQUEST'
].
_serialized_start
=
4018
_globals
[
'_LOADLORAREQUEST'
].
_serialized_end
=
40
67
_globals
[
'_LOADLORAREQUEST'
].
_serialized_end
=
40
91
_globals
[
'_LOADLORARESPONSE'
].
_serialized_start
=
40
6
9
_globals
[
'_LOADLORARESPONSE'
].
_serialized_start
=
409
3
_globals
[
'_LOADLORARESPONSE'
].
_serialized_end
=
41
41
_globals
[
'_LOADLORARESPONSE'
].
_serialized_end
=
41
65
_globals
[
'_UNLOADLORAREQUEST'
].
_serialized_start
=
41
43
_globals
[
'_UNLOADLORAREQUEST'
].
_serialized_start
=
41
67
_globals
[
'_UNLOADLORAREQUEST'
].
_serialized_end
=
4
182
_globals
[
'_UNLOADLORAREQUEST'
].
_serialized_end
=
4
206
_globals
[
'_UNLOADLORARESPONSE'
].
_serialized_start
=
4
184
_globals
[
'_UNLOADLORARESPONSE'
].
_serialized_start
=
4
208
_globals
[
'_UNLOADLORARESPONSE'
].
_serialized_end
=
42
38
_globals
[
'_UNLOADLORARESPONSE'
].
_serialized_end
=
42
62
_globals
[
'_UPDATEWEIGHTSREQUEST'
].
_serialized_start
=
424
0
_globals
[
'_UPDATEWEIGHTSREQUEST'
].
_serialized_start
=
42
6
4
_globals
[
'_UPDATEWEIGHTSREQUEST'
].
_serialized_end
=
43
59
_globals
[
'_UPDATEWEIGHTSREQUEST'
].
_serialized_end
=
43
83
_globals
[
'_UPDATEWEIGHTSRESPONSE'
].
_serialized_start
=
43
61
_globals
[
'_UPDATEWEIGHTSRESPONSE'
].
_serialized_start
=
43
85
_globals
[
'_UPDATEWEIGHTSRESPONSE'
].
_serialized_end
=
44
18
_globals
[
'_UPDATEWEIGHTSRESPONSE'
].
_serialized_end
=
44
42
_globals
[
'_GETINTERNALSTATEREQUEST'
].
_serialized_start
=
44
20
_globals
[
'_GETINTERNALSTATEREQUEST'
].
_serialized_start
=
44
44
_globals
[
'_GETINTERNALSTATEREQUEST'
].
_serialized_end
=
44
65
_globals
[
'_GETINTERNALSTATEREQUEST'
].
_serialized_end
=
44
89
_globals
[
'_GETINTERNALSTATERESPONSE'
].
_serialized_start
=
44
67
_globals
[
'_GETINTERNALSTATERESPONSE'
].
_serialized_start
=
44
91
_globals
[
'_GETINTERNALSTATERESPONSE'
].
_serialized_end
=
45
33
_globals
[
'_GETINTERNALSTATERESPONSE'
].
_serialized_end
=
45
57
_globals
[
'_SETINTERNALSTATEREQUEST'
].
_serialized_start
=
45
3
5
_globals
[
'_SETINTERNALSTATEREQUEST'
].
_serialized_start
=
455
9
_globals
[
'_SETINTERNALSTATEREQUEST'
].
_serialized_end
=
46
00
_globals
[
'_SETINTERNALSTATEREQUEST'
].
_serialized_end
=
46
24
_globals
[
'_SETINTERNALSTATERESPONSE'
].
_serialized_start
=
46
0
2
_globals
[
'_SETINTERNALSTATERESPONSE'
].
_serialized_start
=
462
6
_globals
[
'_SETINTERNALSTATERESPONSE'
].
_serialized_end
=
466
2
_globals
[
'_SETINTERNALSTATERESPONSE'
].
_serialized_end
=
46
8
6
_globals
[
'_SGLANGSCHEDULER'
].
_serialized_start
=
46
65
_globals
[
'_SGLANGSCHEDULER'
].
_serialized_start
=
46
89
_globals
[
'_SGLANGSCHEDULER'
].
_serialized_end
=
50
4
7
_globals
[
'_SGLANGSCHEDULER'
].
_serialized_end
=
507
1
# @@protoc_insertion_point(module_scope)
# @@protoc_insertion_point(module_scope)
sgl-router/src/grpc_client/sglang_scheduler.rs
View file @
0c3db889
...
@@ -20,7 +20,7 @@ pub struct SglangSchedulerClient {
...
@@ -20,7 +20,7 @@ pub struct SglangSchedulerClient {
impl
SglangSchedulerClient
{
impl
SglangSchedulerClient
{
/// Create a new client and connect to the scheduler
/// Create a new client and connect to the scheduler
pub
async
fn
connect
(
endpoint
:
&
str
)
->
Result
<
Self
,
Box
<
dyn
std
::
error
::
Error
>>
{
pub
async
fn
connect
(
endpoint
:
&
str
)
->
Result
<
Self
,
Box
<
dyn
std
::
error
::
Error
+
Send
+
Sync
>>
{
debug!
(
"Connecting to SGLang scheduler at {}"
,
endpoint
);
debug!
(
"Connecting to SGLang scheduler at {}"
,
endpoint
);
// Convert grpc:// to http:// for tonic
// Convert grpc:// to http:// for tonic
...
@@ -41,10 +41,11 @@ impl SglangSchedulerClient {
...
@@ -41,10 +41,11 @@ impl SglangSchedulerClient {
}
}
/// Submit a generation request (returns streaming response)
/// Submit a generation request (returns streaming response)
pub
async
fn
generate
_stream
(
pub
async
fn
generate
(
&
mut
self
,
&
mut
self
,
req
:
proto
::
GenerateRequest
,
req
:
proto
::
GenerateRequest
,
)
->
Result
<
tonic
::
Streaming
<
proto
::
GenerateResponse
>
,
Box
<
dyn
std
::
error
::
Error
>>
{
)
->
Result
<
tonic
::
Streaming
<
proto
::
GenerateResponse
>
,
Box
<
dyn
std
::
error
::
Error
+
Send
+
Sync
>>
{
let
request
=
Request
::
new
(
req
);
let
request
=
Request
::
new
(
req
);
let
response
=
self
.client
.generate
(
request
)
.await
?
;
let
response
=
self
.client
.generate
(
request
)
.await
?
;
Ok
(
response
.into_inner
())
Ok
(
response
.into_inner
())
...
@@ -53,7 +54,7 @@ impl SglangSchedulerClient {
...
@@ -53,7 +54,7 @@ impl SglangSchedulerClient {
/// Perform health check
/// Perform health check
pub
async
fn
health_check
(
pub
async
fn
health_check
(
&
mut
self
,
&
mut
self
,
)
->
Result
<
proto
::
HealthCheckResponse
,
Box
<
dyn
std
::
error
::
Error
>>
{
)
->
Result
<
proto
::
HealthCheckResponse
,
Box
<
dyn
std
::
error
::
Error
+
Send
+
Sync
>>
{
debug!
(
"Sending health check request"
);
debug!
(
"Sending health check request"
);
let
request
=
Request
::
new
(
proto
::
HealthCheckRequest
{
let
request
=
Request
::
new
(
proto
::
HealthCheckRequest
{
tokenized
:
Some
(
proto
::
TokenizedInput
{
tokenized
:
Some
(
proto
::
TokenizedInput
{
...
@@ -72,7 +73,7 @@ impl SglangSchedulerClient {
...
@@ -72,7 +73,7 @@ impl SglangSchedulerClient {
&
mut
self
,
&
mut
self
,
request_id
:
String
,
request_id
:
String
,
reason
:
String
,
reason
:
String
,
)
->
Result
<
(),
Box
<
dyn
std
::
error
::
Error
>>
{
)
->
Result
<
(),
Box
<
dyn
std
::
error
::
Error
+
Send
+
Sync
>>
{
let
request
=
Request
::
new
(
proto
::
AbortRequest
{
request_id
,
reason
});
let
request
=
Request
::
new
(
proto
::
AbortRequest
{
request_id
,
reason
});
self
.client
.abort
(
request
)
.await
?
;
self
.client
.abort
(
request
)
.await
?
;
...
@@ -85,7 +86,7 @@ impl SglangSchedulerClient {
...
@@ -85,7 +86,7 @@ impl SglangSchedulerClient {
request_id
:
String
,
request_id
:
String
,
body
:
&
ChatCompletionRequest
,
body
:
&
ChatCompletionRequest
,
processed_text
:
String
,
processed_text
:
String
,
token_ids
:
Vec
<
i
32
>
,
token_ids
:
Vec
<
u
32
>
,
multimodal_inputs
:
Option
<
proto
::
MultimodalInputs
>
,
multimodal_inputs
:
Option
<
proto
::
MultimodalInputs
>
,
tool_call_constraint
:
Option
<
(
String
,
String
)
>
,
// (constraint_type, constraint_value)
tool_call_constraint
:
Option
<
(
String
,
String
)
>
,
// (constraint_type, constraint_value)
)
->
Result
<
proto
::
GenerateRequest
,
String
>
{
)
->
Result
<
proto
::
GenerateRequest
,
String
>
{
...
@@ -153,6 +154,8 @@ impl SglangSchedulerClient {
...
@@ -153,6 +154,8 @@ impl SglangSchedulerClient {
stop
:
stop_sequences
,
stop
:
stop_sequences
,
stop_token_ids
:
request
.stop_token_ids
.clone
()
.unwrap_or_default
(),
stop_token_ids
:
request
.stop_token_ids
.clone
()
.unwrap_or_default
(),
skip_special_tokens
,
skip_special_tokens
,
ignore_eos
:
request
.ignore_eos
,
no_stop_trim
:
request
.no_stop_trim
,
n
:
request
.n
.unwrap_or
(
1
)
as
i32
,
n
:
request
.n
.unwrap_or
(
1
)
as
i32
,
constraint
:
self
.build_constraint
(
request
,
tool_call_constraint
)
?
,
constraint
:
self
.build_constraint
(
request
,
tool_call_constraint
)
?
,
..
Default
::
default
()
..
Default
::
default
()
...
...
sgl-router/src/proto/sglang_scheduler.proto
View file @
0c3db889
...
@@ -38,7 +38,7 @@ message SamplingParams {
...
@@ -38,7 +38,7 @@ message SamplingParams {
optional
int32
max_new_tokens
=
8
;
optional
int32
max_new_tokens
=
8
;
repeated
string
stop
=
9
;
repeated
string
stop
=
9
;
repeated
int32
stop_token_ids
=
10
;
repeated
u
int32
stop_token_ids
=
10
;
bool
skip_special_tokens
=
11
;
bool
skip_special_tokens
=
11
;
bool
spaces_between_special_tokens
=
12
;
bool
spaces_between_special_tokens
=
12
;
...
@@ -98,7 +98,7 @@ message GenerateRequest {
...
@@ -98,7 +98,7 @@ message GenerateRequest {
bool
return_logprob
=
5
;
bool
return_logprob
=
5
;
int32
logprob_start_len
=
6
;
int32
logprob_start_len
=
6
;
int32
top_logprobs_num
=
7
;
int32
top_logprobs_num
=
7
;
repeated
int32
token_ids_logprob
=
8
;
repeated
u
int32
token_ids_logprob
=
8
;
bool
return_hidden_states
=
9
;
bool
return_hidden_states
=
9
;
// For disaggregated serving
// For disaggregated serving
...
@@ -129,7 +129,7 @@ message GenerateRequest {
...
@@ -129,7 +129,7 @@ message GenerateRequest {
message
TokenizedInput
{
message
TokenizedInput
{
string
original_text
=
1
;
// For reference
string
original_text
=
1
;
// For reference
repeated
int32
input_ids
=
2
;
repeated
u
int32
input_ids
=
2
;
}
}
message
MultimodalInputs
{
message
MultimodalInputs
{
...
@@ -167,7 +167,7 @@ message GenerateResponse {
...
@@ -167,7 +167,7 @@ message GenerateResponse {
message
GenerateStreamChunk
{
message
GenerateStreamChunk
{
// Generated tokens (incremental chunk)
// Generated tokens (incremental chunk)
repeated
int32
token_ids
=
1
;
repeated
u
int32
token_ids
=
1
;
// Cumulative counts
// Cumulative counts
int32
prompt_tokens
=
2
;
int32
prompt_tokens
=
2
;
...
@@ -183,7 +183,7 @@ message GenerateStreamChunk {
...
@@ -183,7 +183,7 @@ message GenerateStreamChunk {
message
GenerateComplete
{
message
GenerateComplete
{
// Final output
// Final output
repeated
int32
output_ids
=
1
;
repeated
u
int32
output_ids
=
1
;
// Finish reason
// Finish reason
enum
FinishReason
{
enum
FinishReason
{
...
...
sgl-router/src/protocols/spec.rs
View file @
0c3db889
...
@@ -313,7 +313,7 @@ pub struct ChatCompletionRequest {
...
@@ -313,7 +313,7 @@ pub struct ChatCompletionRequest {
/// Specific token IDs to use as stop conditions
/// Specific token IDs to use as stop conditions
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
stop_token_ids
:
Option
<
Vec
<
i
32
>>
,
pub
stop_token_ids
:
Option
<
Vec
<
u
32
>>
,
/// Skip trimming stop tokens from output
/// Skip trimming stop tokens from output
#[serde(default)]
#[serde(default)]
...
@@ -564,7 +564,7 @@ pub struct CompletionRequest {
...
@@ -564,7 +564,7 @@ pub struct CompletionRequest {
/// Specific token IDs to use as stop conditions
/// Specific token IDs to use as stop conditions
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
stop_token_ids
:
Option
<
Vec
<
i
32
>>
,
pub
stop_token_ids
:
Option
<
Vec
<
u
32
>>
,
/// Skip trimming stop tokens from output
/// Skip trimming stop tokens from output
#[serde(default)]
#[serde(default)]
...
@@ -1864,7 +1864,7 @@ pub struct SamplingParams {
...
@@ -1864,7 +1864,7 @@ pub struct SamplingParams {
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
min_tokens
:
Option
<
u32
>
,
pub
min_tokens
:
Option
<
u32
>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
stop_token_ids
:
Option
<
Vec
<
i
32
>>
,
pub
stop_token_ids
:
Option
<
Vec
<
u
32
>>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
no_stop_trim
:
Option
<
bool
>
,
pub
no_stop_trim
:
Option
<
bool
>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
...
...
sgl-router/src/routers/grpc/router.rs
View file @
0c3db889
...
@@ -17,19 +17,20 @@ use crate::grpc_client::{proto, SglangSchedulerClient};
...
@@ -17,19 +17,20 @@ use crate::grpc_client::{proto, SglangSchedulerClient};
use
crate
::
metrics
::
RouterMetrics
;
use
crate
::
metrics
::
RouterMetrics
;
use
crate
::
policies
::
PolicyRegistry
;
use
crate
::
policies
::
PolicyRegistry
;
use
crate
::
protocols
::
spec
::
ChatMessage
;
use
crate
::
protocols
::
spec
::
ChatMessage
;
use
crate
::
protocols
::
spec
::{
ChatCompletionRequest
,
StringOrArray
};
use
crate
::
protocols
::
spec
::{
use
crate
::
protocols
::
spec
::{
CompletionRequest
,
EmbeddingRequest
,
GenerateRequest
,
RerankRequest
,
ResponsesGetParams
,
ChatCompletionRequest
,
CompletionRequest
,
EmbeddingRequest
,
GenerateRequest
,
RerankRequest
,
Responses
Request
,
Tool
,
ToolChoice
,
Responses
GetParams
,
ResponsesRequest
,
StringOrArray
,
Tool
,
ToolChoice
,
};
};
use
crate
::
reasoning_parser
::
ParserFactory
;
use
crate
::
reasoning_parser
::
ParserFactory
;
use
crate
::
routers
::
RouterTrait
;
use
crate
::
routers
::
RouterTrait
;
use
crate
::
server
::
AppContext
;
use
crate
::
server
::
AppContext
;
use
crate
::
tokenizer
::
chat_template
::{
ChatTemplateContentFormat
,
ChatTemplateParams
};
use
crate
::
tokenizer
::
chat_template
::{
ChatTemplateContentFormat
,
ChatTemplateParams
};
use
crate
::
tokenizer
::
stop
::{
SequenceDecoderOutput
,
StopSequenceDecoderBuilder
};
use
crate
::
tokenizer
::
traits
::
Tokenizer
;
use
crate
::
tokenizer
::
traits
::
Tokenizer
;
use
crate
::
tokenizer
::
HuggingFaceTokenizer
;
use
crate
::
tokenizer
::
HuggingFaceTokenizer
;
use
crate
::
tool_parser
::
ParserRegistry
;
use
crate
::
tool_parser
::
ParserRegistry
;
use
serde_json
::
Value
;
use
serde_json
::
Value
;
use
tokio_stream
::
StreamExt
;
use
uuid
::
Uuid
;
use
uuid
::
Uuid
;
// Data structures for processing
// Data structures for processing
...
@@ -182,7 +183,7 @@ impl GrpcRouter {
...
@@ -182,7 +183,7 @@ impl GrpcRouter {
request_id
,
request_id
,
body
,
body
,
processed_messages
.text
.clone
(),
processed_messages
.text
.clone
(),
token_ids
.into_iter
()
.map
(|
id
|
id
as
i32
)
.collect
()
,
token_ids
,
processed_messages
.multimodal_inputs
,
processed_messages
.multimodal_inputs
,
tool_call_constraint
,
// Pass the full tuple (type, value)
tool_call_constraint
,
// Pass the full tuple (type, value)
)
{
)
{
...
@@ -479,28 +480,225 @@ impl GrpcRouter {
...
@@ -479,28 +480,225 @@ impl GrpcRouter {
None
None
}
}
/// Placeholder for streaming handler (to be implemented in Phase 2)
/// Create a StopSequenceDecoder from the chat completion request
fn
create_stop_decoder
(
&
self
,
original_request
:
&
ChatCompletionRequest
,
)
->
crate
::
tokenizer
::
stop
::
StopSequenceDecoder
{
// Extract stop sequences from request
let
stop_sequences
:
Vec
<
String
>
=
match
&
original_request
.stop
{
Some
(
StringOrArray
::
String
(
s
))
=>
vec!
[
s
.clone
()],
Some
(
StringOrArray
::
Array
(
arr
))
=>
arr
.clone
(),
None
=>
vec!
[],
};
// Build stop sequence decoder
let
mut
builder
=
StopSequenceDecoderBuilder
::
new
(
self
.tokenizer
.clone
())
.skip_special_tokens
(
original_request
.skip_special_tokens
);
// Add stop sequences (visible if no_stop_trim is true, hidden otherwise)
for
seq
in
stop_sequences
{
builder
=
if
original_request
.no_stop_trim
{
builder
.visible_stop_sequence
(
seq
)
}
else
{
builder
.stop_sequence
(
seq
)
};
}
// Add stop token IDs (visible if no_stop_trim is true, hidden otherwise)
if
let
Some
(
stop_token_ids
)
=
&
original_request
.stop_token_ids
{
for
&
token_id
in
stop_token_ids
{
builder
=
if
original_request
.no_stop_trim
{
builder
.visible_stop_token
(
token_id
)
}
else
{
builder
.stop_token
(
token_id
)
};
}
}
builder
.build
()
}
/// Process a chunk of tokens through the stop decoder
fn
process_chunk_tokens
(
stop_decoder
:
&
mut
crate
::
tokenizer
::
stop
::
StopSequenceDecoder
,
token_ids
:
&
[
u32
],
)
->
(
String
,
bool
)
{
let
mut
chunk_text
=
String
::
new
();
for
&
token_id
in
token_ids
{
match
stop_decoder
.process_token
(
token_id
)
.unwrap_or_else
(|
e
|
{
debug!
(
"Error processing token {}: {}. Treating as Held."
,
token_id
,
e
);
SequenceDecoderOutput
::
Held
})
{
SequenceDecoderOutput
::
Text
(
text
)
=>
{
chunk_text
.push_str
(
&
text
);
}
SequenceDecoderOutput
::
StoppedWithText
(
text
)
=>
{
chunk_text
.push_str
(
&
text
);
return
(
chunk_text
,
true
);
// Return text and signal to stop
}
SequenceDecoderOutput
::
Stopped
=>
{
return
(
chunk_text
,
true
);
// Return text and signal to stop
}
SequenceDecoderOutput
::
Held
=>
{
// Text held for potential stop sequence match
}
}
}
(
chunk_text
,
false
)
// Return text and continue processing
}
/// Submit request and handle streaming response for chat completions route
async
fn
handle_streaming_chat
(
async
fn
handle_streaming_chat
(
&
self
,
&
self
,
_
client
:
SglangSchedulerClient
,
mut
client
:
SglangSchedulerClient
,
_
request
:
proto
::
GenerateRequest
,
request
:
proto
::
GenerateRequest
,
_
original_request
:
&
ChatCompletionRequest
,
original_request
:
&
ChatCompletionRequest
,
)
->
Response
{
)
->
Response
{
(
StatusCode
::
NOT_IMPLEMENTED
,
"Streaming not yet implemented"
)
.into_response
()
let
mut
stop_decoder
=
self
.create_stop_decoder
(
original_request
);
// Process streaming tokens
let
mut
grpc_stream
=
match
client
.generate
(
request
)
.await
{
Ok
(
stream
)
=>
stream
,
Err
(
e
)
=>
{
error!
(
"Failed to start generation: {}"
,
e
);
return
(
StatusCode
::
INTERNAL_SERVER_ERROR
,
format!
(
"Generation failed: {}"
,
e
),
)
.into_response
();
}
};
let
mut
decoded_text
=
String
::
new
();
while
let
Some
(
response
)
=
grpc_stream
.next
()
.await
{
let
gen_response
=
match
response
{
Ok
(
resp
)
=>
resp
,
Err
(
e
)
=>
{
error!
(
"Stream error: {}"
,
e
);
break
;
}
};
match
gen_response
.response
{
Some
(
proto
::
generate_response
::
Response
::
Chunk
(
chunk
))
=>
{
// Process tokens and check if we should stop
let
(
chunk_text
,
should_stop
)
=
Self
::
process_chunk_tokens
(
&
mut
stop_decoder
,
&
chunk
.token_ids
);
decoded_text
.push_str
(
&
chunk_text
);
if
should_stop
{
break
;
}
continue
;
}
Some
(
proto
::
generate_response
::
Response
::
Complete
(
_
complete
))
=>
{
// Flush any remaining text
if
let
SequenceDecoderOutput
::
Text
(
text
)
=
stop_decoder
.flush
()
{
if
!
text
.is_empty
()
{
decoded_text
.push_str
(
&
text
);
debug!
(
"Flushed text: {}"
,
text
);
}
}
break
;
}
Some
(
proto
::
generate_response
::
Response
::
Error
(
error
))
=>
{
error!
(
"Generation error: {}"
,
error
.message
);
break
;
}
None
=>
continue
,
}
}
// TODO: Replace with proper SSE streaming response
// For now, return the complete decoded text
(
StatusCode
::
OK
,
format!
(
"Decoded text: {}"
,
decoded_text
))
.into_response
()
}
}
///
Placeholder for non-streaming handler (to be implemented in Phase 3)
///
Submit request and handle non-streaming response for chat completions route
async
fn
handle_non_streaming_chat
(
async
fn
handle_non_streaming_chat
(
&
self
,
&
self
,
_
client
:
SglangSchedulerClient
,
mut
client
:
SglangSchedulerClient
,
_
request
:
proto
::
GenerateRequest
,
request
:
proto
::
GenerateRequest
,
_
original_request
:
&
ChatCompletionRequest
,
original_request
:
&
ChatCompletionRequest
,
)
->
Response
{
)
->
Response
{
(
let
mut
stop_decoder
=
self
.create_stop_decoder
(
original_request
);
StatusCode
::
NOT_IMPLEMENTED
,
"Non-streaming not yet implemented"
,
// Small helpers to log + return a uniform 500
)
let
fail_str
=
|
msg
:
&
'static
str
|
->
Response
{
.into_response
()
error!
(
"{}"
,
msg
);
(
StatusCode
::
INTERNAL_SERVER_ERROR
,
msg
)
.into_response
()
};
let
fail_fmt
=
|
prefix
:
&
str
,
e
:
&
dyn
std
::
fmt
::
Display
|
->
Response
{
error!
(
"{}{}"
,
prefix
,
e
);
(
StatusCode
::
INTERNAL_SERVER_ERROR
,
format!
(
"{}{}"
,
prefix
,
e
),
)
.into_response
()
};
// Start generation
let
mut
stream
=
match
client
.generate
(
request
)
.await
{
Ok
(
s
)
=>
s
,
Err
(
e
)
=>
return
fail_fmt
(
"Failed to start generation: "
,
&
e
),
};
// Get the single Complete response
let
gen_response
=
match
stream
.next
()
.await
{
Some
(
Ok
(
r
))
=>
r
,
Some
(
Err
(
e
))
=>
return
fail_fmt
(
"Failed to get GenerateResponse: "
,
&
e
),
None
=>
return
fail_str
(
"No response from server"
),
};
// Extract the expected variant early
let
complete
=
match
gen_response
.response
{
Some
(
proto
::
generate_response
::
Response
::
Complete
(
c
))
=>
c
,
Some
(
proto
::
generate_response
::
Response
::
Error
(
err
))
=>
{
error!
(
"Generation failed: {}"
,
err
.message
);
return
(
StatusCode
::
INTERNAL_SERVER_ERROR
,
format!
(
"Generation failed: {}"
,
err
.message
),
)
.into_response
();
}
Some
(
proto
::
generate_response
::
Response
::
Chunk
(
_
))
=>
{
return
fail_str
(
"Unexpected chunk response for non-streaming request"
)
}
None
=>
return
fail_str
(
"Empty response from server"
),
};
// Decode tokens
let
outputs
=
match
stop_decoder
.process_tokens
(
&
complete
.output_ids
)
{
Ok
(
o
)
=>
o
,
Err
(
e
)
=>
return
fail_fmt
(
"Failed to process tokens: "
,
&
e
),
};
// Accumulate text with early breaks
let
mut
final_text
=
String
::
new
();
for
output
in
outputs
{
match
output
{
SequenceDecoderOutput
::
Text
(
t
)
=>
final_text
.push_str
(
&
t
),
SequenceDecoderOutput
::
StoppedWithText
(
t
)
=>
{
final_text
.push_str
(
&
t
);
break
;
}
SequenceDecoderOutput
::
Stopped
=>
break
,
SequenceDecoderOutput
::
Held
=>
{}
}
}
// Flush remaining text
if
let
SequenceDecoderOutput
::
Text
(
t
)
=
stop_decoder
.flush
()
{
final_text
.push_str
(
&
t
);
}
// TODO: Create proper OpenAI-compatible response
(
StatusCode
::
OK
,
format!
(
"Final text: {}"
,
final_text
))
.into_response
()
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment