Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
7dcd689b
Unverified
Commit
7dcd689b
authored
Sep 25, 2025
by
Chang Su
Committed by
GitHub
Sep 25, 2025
Browse files
[router][refactor] Clean up protobuf fields (#10923)
parent
f7bab41a
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
102 additions
and
109 deletions
+102
-109
python/sglang/srt/entrypoints/grpc_server.py
python/sglang/srt/entrypoints/grpc_server.py
+8
-7
python/sglang/srt/grpc/sglang_scheduler.proto
python/sglang/srt/grpc/sglang_scheduler.proto
+14
-16
python/sglang/srt/grpc/sglang_scheduler_pb2.py
python/sglang/srt/grpc/sglang_scheduler_pb2.py
+54
-54
python/sglang/srt/grpc/sglang_scheduler_pb2.pyi
python/sglang/srt/grpc/sglang_scheduler_pb2.pyi
+12
-16
sgl-router/src/proto/sglang_scheduler.proto
sgl-router/src/proto/sglang_scheduler.proto
+14
-16
No files found.
python/sglang/srt/entrypoints/grpc_server.py
View file @
7dcd689b
...
@@ -266,7 +266,6 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
...
@@ -266,7 +266,6 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
prompt_tokens
=
result
.
get
(
"prompt_tokens"
,
0
),
prompt_tokens
=
result
.
get
(
"prompt_tokens"
,
0
),
cached_tokens
=
0
,
cached_tokens
=
0
,
embedding_dim
=
len
(
result
[
"embedding"
]),
embedding_dim
=
len
(
result
[
"embedding"
]),
generation_time
=
time
.
time
()
-
self
.
start_time
,
),
),
)
)
...
@@ -477,16 +476,14 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
...
@@ -477,16 +476,14 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
self
,
request_id
:
str
,
output
:
Dict
self
,
request_id
:
str
,
output
:
Dict
)
->
sglang_scheduler_pb2
.
GenerateResponse
:
)
->
sglang_scheduler_pb2
.
GenerateResponse
:
"""Create a streaming chunk response."""
"""Create a streaming chunk response."""
meta_info
=
output
.
get
(
"meta_info"
,
{})
return
sglang_scheduler_pb2
.
GenerateResponse
(
return
sglang_scheduler_pb2
.
GenerateResponse
(
request_id
=
request_id
,
request_id
=
request_id
,
chunk
=
sglang_scheduler_pb2
.
GenerateStreamChunk
(
chunk
=
sglang_scheduler_pb2
.
GenerateStreamChunk
(
token_id
=
output
[
"token_ids"
][
-
1
]
if
output
.
get
(
"token_ids"
)
else
0
,
token_id
=
output
[
"token_ids"
][
-
1
]
if
output
.
get
(
"token_ids"
)
else
0
,
text
=
output
.
get
(
"text"
,
""
),
prompt_tokens
=
meta_info
.
get
(
"prompt_tokens"
,
0
),
prompt_tokens
=
0
,
completion_tokens
=
meta_info
.
get
(
"completion_tokens"
,
0
),
completion_tokens
=
len
(
output
.
get
(
"token_ids"
,
[])),
cached_tokens
=
0
,
cached_tokens
=
0
,
generation_time
=
time
.
time
()
-
self
.
start_time
,
queue_time
=
0.0
,
),
),
)
)
...
@@ -507,8 +504,12 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
...
@@ -507,8 +504,12 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
request_id
=
request_id
,
request_id
=
request_id
,
complete
=
sglang_scheduler_pb2
.
GenerateComplete
(
complete
=
sglang_scheduler_pb2
.
GenerateComplete
(
output_ids
=
output
.
get
(
"token_ids"
,
[]),
output_ids
=
output
.
get
(
"token_ids"
,
[]),
output_text
=
output
.
get
(
"text"
,
""
),
finish_reason
=
finish_reason
,
finish_reason
=
finish_reason
,
prompt_tokens
=
meta_info
.
get
(
"prompt_tokens"
,
0
),
completion_tokens
=
meta_info
.
get
(
"completion_tokens"
,
len
(
output
.
get
(
"token_ids"
,
[]))
),
cached_tokens
=
meta_info
.
get
(
"cached_tokens"
,
0
),
),
),
)
)
...
...
python/sglang/srt/grpc/sglang_scheduler.proto
View file @
7dcd689b
...
@@ -165,28 +165,22 @@ message GenerateResponse {
...
@@ -165,28 +165,22 @@ message GenerateResponse {
message
GenerateStreamChunk
{
message
GenerateStreamChunk
{
// Generated token
// Generated token
int32
token_id
=
1
;
int32
token_id
=
1
;
string
text
=
2
;
// Cumulative counts
// Cumulative counts
int32
prompt_tokens
=
3
;
int32
prompt_tokens
=
2
;
int32
completion_tokens
=
4
;
int32
completion_tokens
=
3
;
int32
cached_tokens
=
5
;
int32
cached_tokens
=
4
;
// Logprobs (if requested)
// Logprobs (if requested)
LogProbs
logprobs
=
6
;
LogProbs
logprobs
=
5
;
// Hidden states (if requested)
// Hidden states (if requested)
repeated
float
hidden_states
=
7
;
repeated
float
hidden_states
=
6
;
// Metadata
float
generation_time
=
8
;
// Time to generate this token
int32
queue_time
=
9
;
// Time spent in queue
}
}
message
GenerateComplete
{
message
GenerateComplete
{
// Final output
// Final output
repeated
int32
output_ids
=
1
;
repeated
int32
output_ids
=
1
;
string
output_text
=
2
;
// Finish reason
// Finish reason
enum
FinishReason
{
enum
FinishReason
{
...
@@ -201,13 +195,18 @@ message GenerateComplete {
...
@@ -201,13 +195,18 @@ message GenerateComplete {
// The request was aborted by the user or system.
// The request was aborted by the user or system.
ABORT
=
4
;
ABORT
=
4
;
}
}
FinishReason
finish_reason
=
3
;
FinishReason
finish_reason
=
2
;
// Token usage counts
int32
prompt_tokens
=
3
;
int32
completion_tokens
=
4
;
int32
cached_tokens
=
5
;
// All logprobs if requested
// All logprobs if requested
repeated
LogProbs
all_logprobs
=
11
;
repeated
LogProbs
all_logprobs
=
6
;
// All hidden states if requested
// All hidden states if requested
repeated
HiddenStates
all_hidden_states
=
12
;
repeated
HiddenStates
all_hidden_states
=
7
;
}
}
message
GenerateError
{
message
GenerateError
{
...
@@ -285,10 +284,9 @@ message EmbedComplete {
...
@@ -285,10 +284,9 @@ message EmbedComplete {
// Additional metadata
// Additional metadata
int32
embedding_dim
=
4
;
int32
embedding_dim
=
4
;
float
generation_time
=
5
;
// For batch embeddings
// For batch embeddings
repeated
Embedding
batch_embeddings
=
6
;
repeated
Embedding
batch_embeddings
=
5
;
}
}
message
Embedding
{
message
Embedding
{
...
...
python/sglang/srt/grpc/sglang_scheduler_pb2.py
View file @
7dcd689b
...
@@ -29,7 +29,7 @@ from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__
...
@@ -29,7 +29,7 @@ from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__
from
google.protobuf
import
struct_pb2
as
google_dot_protobuf_dot_struct__pb2
from
google.protobuf
import
struct_pb2
as
google_dot_protobuf_dot_struct__pb2
DESCRIPTOR
=
_descriptor_pool
.
Default
().
AddSerializedFile
(
b
'
\n\x16
sglang_scheduler.proto
\x12\x15
sglang.grpc.scheduler
\x1a\x1f
google/protobuf/timestamp.proto
\x1a\x1c
google/protobuf/struct.proto
\"\xc9\x05\n\x0e
SamplingParams
\x12\x13\n\x0b
temperature
\x18\x01
\x01
(
\x02\x12\r\n\x05
top_p
\x18\x02
\x01
(
\x02\x12\r\n\x05
top_k
\x18\x03
\x01
(
\x05\x12\r\n\x05
min_p
\x18\x04
\x01
(
\x02\x12\x19\n\x11\x66
requency_penalty
\x18\x05
\x01
(
\x02\x12\x18\n\x10
presence_penalty
\x18\x06
\x01
(
\x02\x12\x1a\n\x12
repetition_penalty
\x18\x07
\x01
(
\x02\x12\x16\n\x0e
max_new_tokens
\x18\x08
\x01
(
\x05\x12\x0c\n\x04
stop
\x18\t
\x03
(
\t\x12\x16\n\x0e
stop_token_ids
\x18\n
\x03
(
\x05\x12\x1b\n\x13
skip_special_tokens
\x18\x0b
\x01
(
\x08\x12
%
\n\x1d
spaces_between_special_tokens
\x18\x0c
\x01
(
\x08\x12\x0f\n\x05
regex
\x18\r
\x01
(
\t
H
\x00\x12\x15\n\x0b
json_schema
\x18\x0e
\x01
(
\t
H
\x00\x12\x16\n\x0c\x65\x62
nf_grammar
\x18\x0f
\x01
(
\t
H
\x00\x12\x18\n\x0e
structural_tag
\x18\x10
\x01
(
\t
H
\x00\x12\x11\n\t
lora_path
\x18\x11
\x01
(
\t\x12\t\n\x01
n
\x18\x12
\x01
(
\x05\x12\x15\n\r
token_healing
\x18\x13
\x01
(
\x08\x12\x16\n\x0e
min_new_tokens
\x18\x14
\x01
(
\x05\x12\x12\n\n
ignore_eos
\x18\x15
\x01
(
\x08\x12\x14\n\x0c
no_stop_trim
\x18\x16
\x01
(
\x08\x12\x17\n\x0f
stream_interval
\x18\x17
\x01
(
\x05\x12
H
\n\n
logit_bias
\x18\x18
\x03
(
\x0b\x32\x34
.sglang.grpc.scheduler.SamplingParams.LogitBiasEntry
\x12
.
\n\r
custom_params
\x18\x19
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\x1a\x30\n\x0e
LogitBiasEntry
\x12\x0b\n\x03
key
\x18\x01
\x01
(
\t\x12\r\n\x05
value
\x18\x02
\x01
(
\x02
:
\x02\x38\x01\x42\x0c\n\n
constraint
\"
]
\n\x13\x44
isaggregatedParams
\x12\x16\n\x0e\x62
ootstrap_host
\x18\x01
\x01
(
\t\x12\x16\n\x0e\x62
ootstrap_port
\x18\x02
\x01
(
\x05\x12\x16\n\x0e\x62
ootstrap_room
\x18\x03
\x01
(
\x05\"\xe9\x04\n\x0f
GenerateRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\t
tokenized
\x18\x02
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\x12
:
\n\t
mm_inputs
\x18\x03
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.MultimodalInputs
\x12
>
\n\x0f
sampling_params
\x18\x04
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.SamplingParams
\x12\x16\n\x0e
return_logprob
\x18\x05
\x01
(
\x08\x12\x19\n\x11
logprob_start_len
\x18\x06
\x01
(
\x05\x12\x18\n\x10
top_logprobs_num
\x18\x07
\x01
(
\x05\x12\x19\n\x11
token_ids_logprob
\x18\x08
\x03
(
\x05\x12\x1c\n\x14
return_hidden_states
\x18\t
\x01
(
\x08\x12
H
\n\x14\x64
isaggregated_params
\x18\n
\x01
(
\x0b\x32
*.sglang.grpc.scheduler.DisaggregatedParams
\x12\x1e\n\x16\x63
ustom_logit_processor
\x18\x0b
\x01
(
\t\x12
-
\n\t
timestamp
\x18\x0c
\x01
(
\x0b\x32\x1a
.google.protobuf.Timestamp
\x12\x13\n\x0b
log_metrics
\x18\r
\x01
(
\x08\x12\x14\n\x0c
input_embeds
\x18\x0e
\x03
(
\x02\x12\x0f\n\x07
lora_id
\x18\x0f
\x01
(
\t\x12\x1a\n\x12\x64\x61
ta_parallel_rank
\x18\x10
\x01
(
\x05\x12\x15\n\r
dp_balance_id
\x18\x11
\x01
(
\x05\"
:
\n\x0e
TokenizedInput
\x12\x15\n\r
original_text
\x18\x01
\x01
(
\t\x12\x11\n\t
input_ids
\x18\x02
\x03
(
\x05\"\xd3\x01\n\x10
MultimodalInputs
\x12\x12\n\n
image_urls
\x18\x01
\x03
(
\t\x12\x12\n\n
video_urls
\x18\x02
\x03
(
\t\x12\x12\n\n
audio_urls
\x18\x03
\x03
(
\t\x12\x33\n\x12
processed_features
\x18\x04
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\x12\x12\n\n
image_data
\x18\x05
\x03
(
\x0c\x12\x12\n\n
video_data
\x18\x06
\x03
(
\x0c\x12\x12\n\n
audio_data
\x18\x07
\x03
(
\x0c\x12\x12\n\n
modalities
\x18\x08
\x03
(
\t\"\xe3\x01\n\x10
GenerateResponse
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12
;
\n\x05\x63
hunk
\x18\x02
\x01
(
\x0b\x32
*.sglang.grpc.scheduler.GenerateStreamChunkH
\x00\x12
;
\n\x08\x63
omplete
\x18\x03
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.GenerateCompleteH
\x00\x12\x35\n\x05\x65
rror
\x18\x04
\x01
(
\x0b\x32
$.sglang.grpc.scheduler.GenerateErrorH
\x00\x42\n\n\x08
response
\"\x
f5
\x01\n\x13
GenerateStreamChunk
\x12\x10\n\x08
token_id
\x18\x01
\x01
(
\x05\x12\
x0c\n\x04
text
\x18\x02
\x01
(
\t\x12\
x15\n\r
prompt_tokens
\x18\x0
3
\x01
(
\x05\x12\x19\n\x11\x63
ompletion_tokens
\x18\x0
4
\x01
(
\x05\x12\x15\n\r
cached_tokens
\x18\x0
5
\x01
(
\x05\x12\x31\n\x08
logprobs
\x18\x0
6
\x01
(
\x0b\x32\x1f
.sglang.grpc.scheduler.LogProbs
\x12\x15\n\r
hidden_states
\x18\x0
7
\x03
(
\x02\
x12\x17
\n\x
0f
g
enerat
ion_time
\x18\x0
8
\x0
1
(
\x0
2\x12
\x12\n\
n
queue_time
\x18\
t
\x01
(
\x0
5\"\xcd
\x
0
2\
n\x10
GenerateComplete
\x12\x1
2
\n\
n
output_id
s
\x18\x0
1
\x0
3
(
\x05\x12\x1
3
\n\x
0b
output_text
\x18\x0
2
\x01
(
\
t
\x12
K
\n\r
finish_reason
\x18\x0
3
\x01
(
\x0
e\x32\x34
.sglang.grpc.scheduler.GenerateComplete.FinishReason
\x12\x35\n\x0c\x61
ll_logprobs
\x18\x0
b
\x03
(
\x0b\x32\x1f
.sglang.grpc.scheduler.LogProbs
\x12
>
\n\x11\x61
ll_hidden_states
\x18\x0
c
\x03
(
\x0b\x32
#.sglang.grpc.scheduler.HiddenStates
\"
L
\n\x0c\x46
inishReason
\x12\x08\n\x04
STOP
\x10\x00\x12\n\n\x06
LENGTH
\x10\x01\x12\r\n\t
EOS_TOKEN
\x10\x02\x12\x0c\n\x08
STOP_STR
\x10\x03\x12\t\n\x05\x41\x42
ORT
\x10\x04\"
K
\n\r
GenerateError
\x12\x0f\n\x07
message
\x18\x01
\x01
(
\t\x12\x18\n\x10
http_status_code
\x18\x02
\x01
(
\t\x12\x0f\n\x07\x64\x65
tails
\x18\x03
\x01
(
\t\"\x84\x01\n\x08
LogProbs
\x12\x16\n\x0e
token_logprobs
\x18\x01
\x03
(
\x02\x12\x11\n\t
token_ids
\x18\x02
\x03
(
\x05\x12\x38\n\x0c
top_logprobs
\x18\x03
\x03
(
\x0b\x32\"
.sglang.grpc.scheduler.TopLogProbs
\x12\x13\n\x0b
token_texts
\x18\x04
\x03
(
\t\"
E
\n\x0b
TopLogProbs
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\x11\n\t
token_ids
\x18\x02
\x03
(
\x05\x12\x13\n\x0b
token_texts
\x18\x03
\x03
(
\t\"
?
\n\x0c
HiddenStates
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\r\n\x05
layer
\x18\x02
\x01
(
\x05\x12\x10\n\x08
position
\x18\x03
\x01
(
\x05\"\xca\x02\n\x0c\x45
mbedRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\t
tokenized
\x18\x02
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\x12
:
\n\t
mm_inputs
\x18\x04
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.MultimodalInputs
\x12
>
\n\x0f
sampling_params
\x18\x05
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.SamplingParams
\x12\x13\n\x0b
log_metrics
\x18\x06
\x01
(
\x08\x12\x16\n\x0e
token_type_ids
\x18\x07
\x03
(
\x05\x12\x1a\n\x12\x64\x61
ta_parallel_rank
\x18\x08
\x01
(
\x05\x12\x18\n\x10
is_cross_encoder
\x18\t
\x01
(
\x08\x12\r\n\x05
texts
\x18\n
\x03
(
\t\"\x9d\x01\n\r
EmbedResponse
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\x08\x63
omplete
\x18\x02
\x01
(
\x0b\x32
$.sglang.grpc.scheduler.EmbedCompleteH
\x00\x12\x32\n\x05\x65
rror
\x18\x03
\x01
(
\x0b\x32
!.sglang.grpc.scheduler.EmbedErrorH
\x00\x42\n\n\x08
response
\"\x
bc
\x01\n\r
EmbedComplete
\x12\x11\n\t
embedding
\x18\x01
\x03
(
\x02\x12\x15\n\r
prompt_tokens
\x18\x02
\x01
(
\x05\x12\x15\n\r
cached_tokens
\x18\x03
\x01
(
\x05\x12\x15\n\r
embedding_dim
\x18\x04
\x01
(
\x05\x12
\x17\n\x0f
generation_time
\x18\x05
\x01
(
\x02\x12
:
\n\x10\x62\x61
tch_embeddings
\x18\x0
6
\x03
(
\x0b\x32
.sglang.grpc.scheduler.Embedding
\"
*
\n\t
Embedding
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\r\n\x05
index
\x18\x02
\x01
(
\x05\"
<
\n\n
EmbedError
\x12\x0f\n\x07
message
\x18\x01
\x01
(
\t\x12\x0c\n\x04\x63
ode
\x18\x02
\x01
(
\t\x12\x0f\n\x07\x64\x65
tails
\x18\x03
\x01
(
\t\"
N
\n\x12
HealthCheckRequest
\x12\x38\n\t
tokenized
\x18\x01
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\"
7
\n\x13
HealthCheckResponse
\x12\x0f\n\x07
healthy
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
2
\n\x0c\x41\x62
ortRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x0e\n\x06
reason
\x18\x02
\x01
(
\t\"
1
\n\r
AbortResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
I
\n\x0f
LoadLoRARequest
\x12\x12\n\n
adapter_id
\x18\x01
\x01
(
\t\x12\x14\n\x0c\x61\x64\x61
pter_path
\x18\x02
\x01
(
\t\x12\x0c\n\x04
rank
\x18\x03
\x01
(
\x05\"
H
\n\x10
LoadLoRAResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x12\n\n
adapter_id
\x18\x02
\x01
(
\t\x12\x0f\n\x07
message
\x18\x03
\x01
(
\t\"\'\n\x11
UnloadLoRARequest
\x12\x12\n\n
adapter_id
\x18\x01
\x01
(
\t\"
6
\n\x12
UnloadLoRAResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
w
\n\x14
UpdateWeightsRequest
\x12\x13\n\t
disk_path
\x18\x01
\x01
(
\t
H
\x00\x12\x15\n\x0b
tensor_data
\x18\x02
\x01
(
\x0c
H
\x00\x12\x14\n\n
remote_url
\x18\x03
\x01
(
\t
H
\x00\x12\x13\n\x0b
weight_name
\x18\x04
\x01
(
\t
B
\x08\n\x06
source
\"
9
\n\x15
UpdateWeightsResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
-
\n\x17
GetInternalStateRequest
\x12\x12\n\n
state_keys
\x18\x01
\x03
(
\t\"
B
\n\x18
GetInternalStateResponse
\x12
&
\n\x05
state
\x18\x01
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\"
A
\n\x17
SetInternalStateRequest
\x12
&
\n\x05
state
\x18\x01
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\"
<
\n\x18
SetInternalStateResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t
2
\xfe\x02\n\x0f
SglangScheduler
\x12
]
\n\x08
Generate
\x12
&.sglang.grpc.scheduler.GenerateRequest
\x1a\'
.sglang.grpc.scheduler.GenerateResponse0
\x01\x12
R
\n\x05\x45
mbed
\x12
#.sglang.grpc.scheduler.EmbedRequest
\x1a
$.sglang.grpc.scheduler.EmbedResponse
\x12\x64\n\x0b
HealthCheck
\x12
).sglang.grpc.scheduler.HealthCheckRequest
\x1a
*.sglang.grpc.scheduler.HealthCheckResponse
\x12
R
\n\x05\x41\x62
ort
\x12
#.sglang.grpc.scheduler.AbortRequest
\x1a
$.sglang.grpc.scheduler.AbortResponseb
\x06
proto3'
)
DESCRIPTOR
=
_descriptor_pool
.
Default
().
AddSerializedFile
(
b
'
\n\x16
sglang_scheduler.proto
\x12\x15
sglang.grpc.scheduler
\x1a\x1f
google/protobuf/timestamp.proto
\x1a\x1c
google/protobuf/struct.proto
\"\xc9\x05\n\x0e
SamplingParams
\x12\x13\n\x0b
temperature
\x18\x01
\x01
(
\x02\x12\r\n\x05
top_p
\x18\x02
\x01
(
\x02\x12\r\n\x05
top_k
\x18\x03
\x01
(
\x05\x12\r\n\x05
min_p
\x18\x04
\x01
(
\x02\x12\x19\n\x11\x66
requency_penalty
\x18\x05
\x01
(
\x02\x12\x18\n\x10
presence_penalty
\x18\x06
\x01
(
\x02\x12\x1a\n\x12
repetition_penalty
\x18\x07
\x01
(
\x02\x12\x16\n\x0e
max_new_tokens
\x18\x08
\x01
(
\x05\x12\x0c\n\x04
stop
\x18\t
\x03
(
\t\x12\x16\n\x0e
stop_token_ids
\x18\n
\x03
(
\x05\x12\x1b\n\x13
skip_special_tokens
\x18\x0b
\x01
(
\x08\x12
%
\n\x1d
spaces_between_special_tokens
\x18\x0c
\x01
(
\x08\x12\x0f\n\x05
regex
\x18\r
\x01
(
\t
H
\x00\x12\x15\n\x0b
json_schema
\x18\x0e
\x01
(
\t
H
\x00\x12\x16\n\x0c\x65\x62
nf_grammar
\x18\x0f
\x01
(
\t
H
\x00\x12\x18\n\x0e
structural_tag
\x18\x10
\x01
(
\t
H
\x00\x12\x11\n\t
lora_path
\x18\x11
\x01
(
\t\x12\t\n\x01
n
\x18\x12
\x01
(
\x05\x12\x15\n\r
token_healing
\x18\x13
\x01
(
\x08\x12\x16\n\x0e
min_new_tokens
\x18\x14
\x01
(
\x05\x12\x12\n\n
ignore_eos
\x18\x15
\x01
(
\x08\x12\x14\n\x0c
no_stop_trim
\x18\x16
\x01
(
\x08\x12\x17\n\x0f
stream_interval
\x18\x17
\x01
(
\x05\x12
H
\n\n
logit_bias
\x18\x18
\x03
(
\x0b\x32\x34
.sglang.grpc.scheduler.SamplingParams.LogitBiasEntry
\x12
.
\n\r
custom_params
\x18\x19
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\x1a\x30\n\x0e
LogitBiasEntry
\x12\x0b\n\x03
key
\x18\x01
\x01
(
\t\x12\r\n\x05
value
\x18\x02
\x01
(
\x02
:
\x02\x38\x01\x42\x0c\n\n
constraint
\"
]
\n\x13\x44
isaggregatedParams
\x12\x16\n\x0e\x62
ootstrap_host
\x18\x01
\x01
(
\t\x12\x16\n\x0e\x62
ootstrap_port
\x18\x02
\x01
(
\x05\x12\x16\n\x0e\x62
ootstrap_room
\x18\x03
\x01
(
\x05\"\xe9\x04\n\x0f
GenerateRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\t
tokenized
\x18\x02
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\x12
:
\n\t
mm_inputs
\x18\x03
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.MultimodalInputs
\x12
>
\n\x0f
sampling_params
\x18\x04
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.SamplingParams
\x12\x16\n\x0e
return_logprob
\x18\x05
\x01
(
\x08\x12\x19\n\x11
logprob_start_len
\x18\x06
\x01
(
\x05\x12\x18\n\x10
top_logprobs_num
\x18\x07
\x01
(
\x05\x12\x19\n\x11
token_ids_logprob
\x18\x08
\x03
(
\x05\x12\x1c\n\x14
return_hidden_states
\x18\t
\x01
(
\x08\x12
H
\n\x14\x64
isaggregated_params
\x18\n
\x01
(
\x0b\x32
*.sglang.grpc.scheduler.DisaggregatedParams
\x12\x1e\n\x16\x63
ustom_logit_processor
\x18\x0b
\x01
(
\t\x12
-
\n\t
timestamp
\x18\x0c
\x01
(
\x0b\x32\x1a
.google.protobuf.Timestamp
\x12\x13\n\x0b
log_metrics
\x18\r
\x01
(
\x08\x12\x14\n\x0c
input_embeds
\x18\x0e
\x03
(
\x02\x12\x0f\n\x07
lora_id
\x18\x0f
\x01
(
\t\x12\x1a\n\x12\x64\x61
ta_parallel_rank
\x18\x10
\x01
(
\x05\x12\x15\n\r
dp_balance_id
\x18\x11
\x01
(
\x05\"
:
\n\x0e
TokenizedInput
\x12\x15\n\r
original_text
\x18\x01
\x01
(
\t\x12\x11\n\t
input_ids
\x18\x02
\x03
(
\x05\"\xd3\x01\n\x10
MultimodalInputs
\x12\x12\n\n
image_urls
\x18\x01
\x03
(
\t\x12\x12\n\n
video_urls
\x18\x02
\x03
(
\t\x12\x12\n\n
audio_urls
\x18\x03
\x03
(
\t\x12\x33\n\x12
processed_features
\x18\x04
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\x12\x12\n\n
image_data
\x18\x05
\x03
(
\x0c\x12\x12\n\n
video_data
\x18\x06
\x03
(
\x0c\x12\x12\n\n
audio_data
\x18\x07
\x03
(
\x0c\x12\x12\n\n
modalities
\x18\x08
\x03
(
\t\"\xe3\x01\n\x10
GenerateResponse
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12
;
\n\x05\x63
hunk
\x18\x02
\x01
(
\x0b\x32
*.sglang.grpc.scheduler.GenerateStreamChunkH
\x00\x12
;
\n\x08\x63
omplete
\x18\x03
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.GenerateCompleteH
\x00\x12\x35\n\x05\x65
rror
\x18\x04
\x01
(
\x0b\x32
$.sglang.grpc.scheduler.GenerateErrorH
\x00\x42\n\n\x08
response
\"\x
ba
\x01\n\x13
GenerateStreamChunk
\x12\x10\n\x08
token_id
\x18\x01
\x01
(
\x05\x12\x15\n\r
prompt_tokens
\x18\x0
2
\x01
(
\x05\x12\x19\n\x11\x63
ompletion_tokens
\x18\x0
3
\x01
(
\x05\x12\x15\n\r
cached_tokens
\x18\x0
4
\x01
(
\x05\x12\x31\n\x08
logprobs
\x18\x0
5
\x01
(
\x0b\x32\x1f
.sglang.grpc.scheduler.LogProbs
\x12\x15\n\r
hidden_states
\x18\x0
6
\x03
(
\x02\
"\x81\x03
\n\x
10
G
enerat
eComplete
\x12\x12\n\n
output_ids
\x18\x0
1
\x0
3
(
\x0
5
\x12
K
\n\
r
finish_reason
\x18\
x02
\x01
(
\x0
e
\x
3
2\
x34
.sglang.grpc.scheduler.
GenerateComplete
.FinishReason
\x12\x1
5
\n\
r
prompt_token
s
\x18\x0
3
\x0
1
(
\x05\x12\x1
9
\n\x
11\x63
ompletion_tokens
\x18\x0
4
\x01
(
\
x05
\x12
\x15\n\r
cached_tokens
\x18\x0
5
\x01
(
\x0
5
\x12\x35\n\x0c\x61
ll_logprobs
\x18\x0
6
\x03
(
\x0b\x32\x1f
.sglang.grpc.scheduler.LogProbs
\x12
>
\n\x11\x61
ll_hidden_states
\x18\x0
7
\x03
(
\x0b\x32
#.sglang.grpc.scheduler.HiddenStates
\"
L
\n\x0c\x46
inishReason
\x12\x08\n\x04
STOP
\x10\x00\x12\n\n\x06
LENGTH
\x10\x01\x12\r\n\t
EOS_TOKEN
\x10\x02\x12\x0c\n\x08
STOP_STR
\x10\x03\x12\t\n\x05\x41\x42
ORT
\x10\x04\"
K
\n\r
GenerateError
\x12\x0f\n\x07
message
\x18\x01
\x01
(
\t\x12\x18\n\x10
http_status_code
\x18\x02
\x01
(
\t\x12\x0f\n\x07\x64\x65
tails
\x18\x03
\x01
(
\t\"\x84\x01\n\x08
LogProbs
\x12\x16\n\x0e
token_logprobs
\x18\x01
\x03
(
\x02\x12\x11\n\t
token_ids
\x18\x02
\x03
(
\x05\x12\x38\n\x0c
top_logprobs
\x18\x03
\x03
(
\x0b\x32\"
.sglang.grpc.scheduler.TopLogProbs
\x12\x13\n\x0b
token_texts
\x18\x04
\x03
(
\t\"
E
\n\x0b
TopLogProbs
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\x11\n\t
token_ids
\x18\x02
\x03
(
\x05\x12\x13\n\x0b
token_texts
\x18\x03
\x03
(
\t\"
?
\n\x0c
HiddenStates
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\r\n\x05
layer
\x18\x02
\x01
(
\x05\x12\x10\n\x08
position
\x18\x03
\x01
(
\x05\"\xca\x02\n\x0c\x45
mbedRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\t
tokenized
\x18\x02
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\x12
:
\n\t
mm_inputs
\x18\x04
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.MultimodalInputs
\x12
>
\n\x0f
sampling_params
\x18\x05
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.SamplingParams
\x12\x13\n\x0b
log_metrics
\x18\x06
\x01
(
\x08\x12\x16\n\x0e
token_type_ids
\x18\x07
\x03
(
\x05\x12\x1a\n\x12\x64\x61
ta_parallel_rank
\x18\x08
\x01
(
\x05\x12\x18\n\x10
is_cross_encoder
\x18\t
\x01
(
\x08\x12\r\n\x05
texts
\x18\n
\x03
(
\t\"\x9d\x01\n\r
EmbedResponse
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\x08\x63
omplete
\x18\x02
\x01
(
\x0b\x32
$.sglang.grpc.scheduler.EmbedCompleteH
\x00\x12\x32\n\x05\x65
rror
\x18\x03
\x01
(
\x0b\x32
!.sglang.grpc.scheduler.EmbedErrorH
\x00\x42\n\n\x08
response
\"\x
a3
\x01\n\r
EmbedComplete
\x12\x11\n\t
embedding
\x18\x01
\x03
(
\x02\x12\x15\n\r
prompt_tokens
\x18\x02
\x01
(
\x05\x12\x15\n\r
cached_tokens
\x18\x03
\x01
(
\x05\x12\x15\n\r
embedding_dim
\x18\x04
\x01
(
\x05\x12
:
\n\x10\x62\x61
tch_embeddings
\x18\x0
5
\x03
(
\x0b\x32
.sglang.grpc.scheduler.Embedding
\"
*
\n\t
Embedding
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\r\n\x05
index
\x18\x02
\x01
(
\x05\"
<
\n\n
EmbedError
\x12\x0f\n\x07
message
\x18\x01
\x01
(
\t\x12\x0c\n\x04\x63
ode
\x18\x02
\x01
(
\t\x12\x0f\n\x07\x64\x65
tails
\x18\x03
\x01
(
\t\"
N
\n\x12
HealthCheckRequest
\x12\x38\n\t
tokenized
\x18\x01
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\"
7
\n\x13
HealthCheckResponse
\x12\x0f\n\x07
healthy
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
2
\n\x0c\x41\x62
ortRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x0e\n\x06
reason
\x18\x02
\x01
(
\t\"
1
\n\r
AbortResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
I
\n\x0f
LoadLoRARequest
\x12\x12\n\n
adapter_id
\x18\x01
\x01
(
\t\x12\x14\n\x0c\x61\x64\x61
pter_path
\x18\x02
\x01
(
\t\x12\x0c\n\x04
rank
\x18\x03
\x01
(
\x05\"
H
\n\x10
LoadLoRAResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x12\n\n
adapter_id
\x18\x02
\x01
(
\t\x12\x0f\n\x07
message
\x18\x03
\x01
(
\t\"\'\n\x11
UnloadLoRARequest
\x12\x12\n\n
adapter_id
\x18\x01
\x01
(
\t\"
6
\n\x12
UnloadLoRAResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
w
\n\x14
UpdateWeightsRequest
\x12\x13\n\t
disk_path
\x18\x01
\x01
(
\t
H
\x00\x12\x15\n\x0b
tensor_data
\x18\x02
\x01
(
\x0c
H
\x00\x12\x14\n\n
remote_url
\x18\x03
\x01
(
\t
H
\x00\x12\x13\n\x0b
weight_name
\x18\x04
\x01
(
\t
B
\x08\n\x06
source
\"
9
\n\x15
UpdateWeightsResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
-
\n\x17
GetInternalStateRequest
\x12\x12\n\n
state_keys
\x18\x01
\x03
(
\t\"
B
\n\x18
GetInternalStateResponse
\x12
&
\n\x05
state
\x18\x01
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\"
A
\n\x17
SetInternalStateRequest
\x12
&
\n\x05
state
\x18\x01
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\"
<
\n\x18
SetInternalStateResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t
2
\xfe\x02\n\x0f
SglangScheduler
\x12
]
\n\x08
Generate
\x12
&.sglang.grpc.scheduler.GenerateRequest
\x1a\'
.sglang.grpc.scheduler.GenerateResponse0
\x01\x12
R
\n\x05\x45
mbed
\x12
#.sglang.grpc.scheduler.EmbedRequest
\x1a
$.sglang.grpc.scheduler.EmbedResponse
\x12\x64\n\x0b
HealthCheck
\x12
).sglang.grpc.scheduler.HealthCheckRequest
\x1a
*.sglang.grpc.scheduler.HealthCheckResponse
\x12
R
\n\x05\x41\x62
ort
\x12
#.sglang.grpc.scheduler.AbortRequest
\x1a
$.sglang.grpc.scheduler.AbortResponseb
\x06
proto3'
)
_globals
=
globals
()
_globals
=
globals
()
_builder
.
BuildMessageAndEnumDescriptors
(
DESCRIPTOR
,
_globals
)
_builder
.
BuildMessageAndEnumDescriptors
(
DESCRIPTOR
,
_globals
)
...
@@ -53,57 +53,57 @@ if not _descriptor._USE_C_DESCRIPTORS:
...
@@ -53,57 +53,57 @@ if not _descriptor._USE_C_DESCRIPTORS:
_globals
[
'_GENERATERESPONSE'
].
_serialized_start
=
1818
_globals
[
'_GENERATERESPONSE'
].
_serialized_start
=
1818
_globals
[
'_GENERATERESPONSE'
].
_serialized_end
=
2045
_globals
[
'_GENERATERESPONSE'
].
_serialized_end
=
2045
_globals
[
'_GENERATESTREAMCHUNK'
].
_serialized_start
=
2048
_globals
[
'_GENERATESTREAMCHUNK'
].
_serialized_start
=
2048
_globals
[
'_GENERATESTREAMCHUNK'
].
_serialized_end
=
22
9
3
_globals
[
'_GENERATESTREAMCHUNK'
].
_serialized_end
=
223
4
_globals
[
'_GENERATECOMPLETE'
].
_serialized_start
=
22
96
_globals
[
'_GENERATECOMPLETE'
].
_serialized_start
=
22
37
_globals
[
'_GENERATECOMPLETE'
].
_serialized_end
=
262
9
_globals
[
'_GENERATECOMPLETE'
].
_serialized_end
=
262
2
_globals
[
'_GENERATECOMPLETE_FINISHREASON'
].
_serialized_start
=
25
53
_globals
[
'_GENERATECOMPLETE_FINISHREASON'
].
_serialized_start
=
25
46
_globals
[
'_GENERATECOMPLETE_FINISHREASON'
].
_serialized_end
=
262
9
_globals
[
'_GENERATECOMPLETE_FINISHREASON'
].
_serialized_end
=
262
2
_globals
[
'_GENERATEERROR'
].
_serialized_start
=
26
31
_globals
[
'_GENERATEERROR'
].
_serialized_start
=
26
24
_globals
[
'_GENERATEERROR'
].
_serialized_end
=
2
706
_globals
[
'_GENERATEERROR'
].
_serialized_end
=
2
699
_globals
[
'_LOGPROBS'
].
_serialized_start
=
270
9
_globals
[
'_LOGPROBS'
].
_serialized_start
=
270
2
_globals
[
'_LOGPROBS'
].
_serialized_end
=
284
1
_globals
[
'_LOGPROBS'
].
_serialized_end
=
28
3
4
_globals
[
'_TOPLOGPROBS'
].
_serialized_start
=
28
4
3
_globals
[
'_TOPLOGPROBS'
].
_serialized_start
=
283
6
_globals
[
'_TOPLOGPROBS'
].
_serialized_end
=
29
12
_globals
[
'_TOPLOGPROBS'
].
_serialized_end
=
29
05
_globals
[
'_HIDDENSTATES'
].
_serialized_start
=
29
14
_globals
[
'_HIDDENSTATES'
].
_serialized_start
=
29
07
_globals
[
'_HIDDENSTATES'
].
_serialized_end
=
297
7
_globals
[
'_HIDDENSTATES'
].
_serialized_end
=
297
0
_globals
[
'_EMBEDREQUEST'
].
_serialized_start
=
29
80
_globals
[
'_EMBEDREQUEST'
].
_serialized_start
=
29
73
_globals
[
'_EMBEDREQUEST'
].
_serialized_end
=
33
1
0
_globals
[
'_EMBEDREQUEST'
].
_serialized_end
=
330
3
_globals
[
'_EMBEDRESPONSE'
].
_serialized_start
=
33
13
_globals
[
'_EMBEDRESPONSE'
].
_serialized_start
=
33
06
_globals
[
'_EMBEDRESPONSE'
].
_serialized_end
=
34
70
_globals
[
'_EMBEDRESPONSE'
].
_serialized_end
=
34
63
_globals
[
'_EMBEDCOMPLETE'
].
_serialized_start
=
34
73
_globals
[
'_EMBEDCOMPLETE'
].
_serialized_start
=
34
66
_globals
[
'_EMBEDCOMPLETE'
].
_serialized_end
=
36
61
_globals
[
'_EMBEDCOMPLETE'
].
_serialized_end
=
36
29
_globals
[
'_EMBEDDING'
].
_serialized_start
=
36
6
3
_globals
[
'_EMBEDDING'
].
_serialized_start
=
363
1
_globals
[
'_EMBEDDING'
].
_serialized_end
=
3
705
_globals
[
'_EMBEDDING'
].
_serialized_end
=
3
673
_globals
[
'_EMBEDERROR'
].
_serialized_start
=
3
707
_globals
[
'_EMBEDERROR'
].
_serialized_start
=
3
675
_globals
[
'_EMBEDERROR'
].
_serialized_end
=
37
67
_globals
[
'_EMBEDERROR'
].
_serialized_end
=
37
35
_globals
[
'_HEALTHCHECKREQUEST'
].
_serialized_start
=
37
69
_globals
[
'_HEALTHCHECKREQUEST'
].
_serialized_start
=
37
37
_globals
[
'_HEALTHCHECKREQUEST'
].
_serialized_end
=
38
47
_globals
[
'_HEALTHCHECKREQUEST'
].
_serialized_end
=
38
15
_globals
[
'_HEALTHCHECKRESPONSE'
].
_serialized_start
=
38
49
_globals
[
'_HEALTHCHECKRESPONSE'
].
_serialized_start
=
38
17
_globals
[
'_HEALTHCHECKRESPONSE'
].
_serialized_end
=
3
904
_globals
[
'_HEALTHCHECKRESPONSE'
].
_serialized_end
=
3
872
_globals
[
'_ABORTREQUEST'
].
_serialized_start
=
3
906
_globals
[
'_ABORTREQUEST'
].
_serialized_start
=
3
874
_globals
[
'_ABORTREQUEST'
].
_serialized_end
=
39
56
_globals
[
'_ABORTREQUEST'
].
_serialized_end
=
39
24
_globals
[
'_ABORTRESPONSE'
].
_serialized_start
=
39
58
_globals
[
'_ABORTRESPONSE'
].
_serialized_start
=
39
26
_globals
[
'_ABORTRESPONSE'
].
_serialized_end
=
4007
_globals
[
'_ABORTRESPONSE'
].
_serialized_end
=
3975
_globals
[
'_LOADLORAREQUEST'
].
_serialized_start
=
4009
_globals
[
'_LOADLORAREQUEST'
].
_serialized_start
=
3977
_globals
[
'_LOADLORAREQUEST'
].
_serialized_end
=
40
82
_globals
[
'_LOADLORAREQUEST'
].
_serialized_end
=
40
50
_globals
[
'_LOADLORARESPONSE'
].
_serialized_start
=
40
84
_globals
[
'_LOADLORARESPONSE'
].
_serialized_start
=
40
52
_globals
[
'_LOADLORARESPONSE'
].
_serialized_end
=
41
56
_globals
[
'_LOADLORARESPONSE'
].
_serialized_end
=
41
24
_globals
[
'_UNLOADLORAREQUEST'
].
_serialized_start
=
41
58
_globals
[
'_UNLOADLORAREQUEST'
].
_serialized_start
=
41
26
_globals
[
'_UNLOADLORAREQUEST'
].
_serialized_end
=
41
97
_globals
[
'_UNLOADLORAREQUEST'
].
_serialized_end
=
41
65
_globals
[
'_UNLOADLORARESPONSE'
].
_serialized_start
=
41
99
_globals
[
'_UNLOADLORARESPONSE'
].
_serialized_start
=
41
67
_globals
[
'_UNLOADLORARESPONSE'
].
_serialized_end
=
42
53
_globals
[
'_UNLOADLORARESPONSE'
].
_serialized_end
=
42
21
_globals
[
'_UPDATEWEIGHTSREQUEST'
].
_serialized_start
=
42
55
_globals
[
'_UPDATEWEIGHTSREQUEST'
].
_serialized_start
=
42
23
_globals
[
'_UPDATEWEIGHTSREQUEST'
].
_serialized_end
=
43
7
4
_globals
[
'_UPDATEWEIGHTSREQUEST'
].
_serialized_end
=
434
2
_globals
[
'_UPDATEWEIGHTSRESPONSE'
].
_serialized_start
=
43
76
_globals
[
'_UPDATEWEIGHTSRESPONSE'
].
_serialized_start
=
43
44
_globals
[
'_UPDATEWEIGHTSRESPONSE'
].
_serialized_end
=
44
33
_globals
[
'_UPDATEWEIGHTSRESPONSE'
].
_serialized_end
=
44
01
_globals
[
'_GETINTERNALSTATEREQUEST'
].
_serialized_start
=
443
5
_globals
[
'_GETINTERNALSTATEREQUEST'
].
_serialized_start
=
44
0
3
_globals
[
'_GETINTERNALSTATEREQUEST'
].
_serialized_end
=
448
0
_globals
[
'_GETINTERNALSTATEREQUEST'
].
_serialized_end
=
44
4
8
_globals
[
'_GETINTERNALSTATERESPONSE'
].
_serialized_start
=
44
82
_globals
[
'_GETINTERNALSTATERESPONSE'
].
_serialized_start
=
44
50
_globals
[
'_GETINTERNALSTATERESPONSE'
].
_serialized_end
=
45
48
_globals
[
'_GETINTERNALSTATERESPONSE'
].
_serialized_end
=
45
16
_globals
[
'_SETINTERNALSTATEREQUEST'
].
_serialized_start
=
45
50
_globals
[
'_SETINTERNALSTATEREQUEST'
].
_serialized_start
=
45
18
_globals
[
'_SETINTERNALSTATEREQUEST'
].
_serialized_end
=
4
615
_globals
[
'_SETINTERNALSTATEREQUEST'
].
_serialized_end
=
4
583
_globals
[
'_SETINTERNALSTATERESPONSE'
].
_serialized_start
=
4
617
_globals
[
'_SETINTERNALSTATERESPONSE'
].
_serialized_start
=
4
585
_globals
[
'_SETINTERNALSTATERESPONSE'
].
_serialized_end
=
46
77
_globals
[
'_SETINTERNALSTATERESPONSE'
].
_serialized_end
=
46
45
_globals
[
'_SGLANGSCHEDULER'
].
_serialized_start
=
468
0
_globals
[
'_SGLANGSCHEDULER'
].
_serialized_start
=
46
4
8
_globals
[
'_SGLANGSCHEDULER'
].
_serialized_end
=
50
62
_globals
[
'_SGLANGSCHEDULER'
].
_serialized_end
=
50
30
# @@protoc_insertion_point(module_scope)
# @@protoc_insertion_point(module_scope)
python/sglang/srt/grpc/sglang_scheduler_pb2.pyi
View file @
7dcd689b
...
@@ -161,29 +161,23 @@ class GenerateResponse(_message.Message):
...
@@ -161,29 +161,23 @@ class GenerateResponse(_message.Message):
def __init__(self, request_id: _Optional[str] = ..., chunk: _Optional[_Union[GenerateStreamChunk, _Mapping]] = ..., complete: _Optional[_Union[GenerateComplete, _Mapping]] = ..., error: _Optional[_Union[GenerateError, _Mapping]] = ...) -> None: ...
def __init__(self, request_id: _Optional[str] = ..., chunk: _Optional[_Union[GenerateStreamChunk, _Mapping]] = ..., complete: _Optional[_Union[GenerateComplete, _Mapping]] = ..., error: _Optional[_Union[GenerateError, _Mapping]] = ...) -> None: ...
class GenerateStreamChunk(_message.Message):
class GenerateStreamChunk(_message.Message):
__slots__ = ("token_id",
"text",
"prompt_tokens", "completion_tokens", "cached_tokens", "logprobs", "hidden_states"
, "generation_time", "queue_time"
)
__slots__ = ("token_id", "prompt_tokens", "completion_tokens", "cached_tokens", "logprobs", "hidden_states")
TOKEN_ID_FIELD_NUMBER: _ClassVar[int]
TOKEN_ID_FIELD_NUMBER: _ClassVar[int]
TEXT_FIELD_NUMBER: _ClassVar[int]
PROMPT_TOKENS_FIELD_NUMBER: _ClassVar[int]
PROMPT_TOKENS_FIELD_NUMBER: _ClassVar[int]
COMPLETION_TOKENS_FIELD_NUMBER: _ClassVar[int]
COMPLETION_TOKENS_FIELD_NUMBER: _ClassVar[int]
CACHED_TOKENS_FIELD_NUMBER: _ClassVar[int]
CACHED_TOKENS_FIELD_NUMBER: _ClassVar[int]
LOGPROBS_FIELD_NUMBER: _ClassVar[int]
LOGPROBS_FIELD_NUMBER: _ClassVar[int]
HIDDEN_STATES_FIELD_NUMBER: _ClassVar[int]
HIDDEN_STATES_FIELD_NUMBER: _ClassVar[int]
GENERATION_TIME_FIELD_NUMBER: _ClassVar[int]
QUEUE_TIME_FIELD_NUMBER: _ClassVar[int]
token_id: int
token_id: int
text: str
prompt_tokens: int
prompt_tokens: int
completion_tokens: int
completion_tokens: int
cached_tokens: int
cached_tokens: int
logprobs: LogProbs
logprobs: LogProbs
hidden_states: _containers.RepeatedScalarFieldContainer[float]
hidden_states: _containers.RepeatedScalarFieldContainer[float]
generation_time: float
def __init__(self, token_id: _Optional[int] = ..., prompt_tokens: _Optional[int] = ..., completion_tokens: _Optional[int] = ..., cached_tokens: _Optional[int] = ..., logprobs: _Optional[_Union[LogProbs, _Mapping]] = ..., hidden_states: _Optional[_Iterable[float]] = ...) -> None: ...
queue_time: int
def __init__(self, token_id: _Optional[int] = ..., text: _Optional[str] = ..., prompt_tokens: _Optional[int] = ..., completion_tokens: _Optional[int] = ..., cached_tokens: _Optional[int] = ..., logprobs: _Optional[_Union[LogProbs, _Mapping]] = ..., hidden_states: _Optional[_Iterable[float]] = ..., generation_time: _Optional[float] = ..., queue_time: _Optional[int] = ...) -> None: ...
class GenerateComplete(_message.Message):
class GenerateComplete(_message.Message):
__slots__ = ("output_ids", "
output_text", "finish_reason
", "all_logprobs", "all_hidden_states")
__slots__ = ("output_ids", "
finish_reason", "prompt_tokens", "completion_tokens", "cached_tokens
", "all_logprobs", "all_hidden_states")
class FinishReason(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
class FinishReason(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = ()
__slots__ = ()
STOP: _ClassVar[GenerateComplete.FinishReason]
STOP: _ClassVar[GenerateComplete.FinishReason]
...
@@ -197,16 +191,20 @@ class GenerateComplete(_message.Message):
...
@@ -197,16 +191,20 @@ class GenerateComplete(_message.Message):
STOP_STR: GenerateComplete.FinishReason
STOP_STR: GenerateComplete.FinishReason
ABORT: GenerateComplete.FinishReason
ABORT: GenerateComplete.FinishReason
OUTPUT_IDS_FIELD_NUMBER: _ClassVar[int]
OUTPUT_IDS_FIELD_NUMBER: _ClassVar[int]
OUTPUT_TEXT_FIELD_NUMBER: _ClassVar[int]
FINISH_REASON_FIELD_NUMBER: _ClassVar[int]
FINISH_REASON_FIELD_NUMBER: _ClassVar[int]
PROMPT_TOKENS_FIELD_NUMBER: _ClassVar[int]
COMPLETION_TOKENS_FIELD_NUMBER: _ClassVar[int]
CACHED_TOKENS_FIELD_NUMBER: _ClassVar[int]
ALL_LOGPROBS_FIELD_NUMBER: _ClassVar[int]
ALL_LOGPROBS_FIELD_NUMBER: _ClassVar[int]
ALL_HIDDEN_STATES_FIELD_NUMBER: _ClassVar[int]
ALL_HIDDEN_STATES_FIELD_NUMBER: _ClassVar[int]
output_ids: _containers.RepeatedScalarFieldContainer[int]
output_ids: _containers.RepeatedScalarFieldContainer[int]
output_text: str
finish_reason: GenerateComplete.FinishReason
finish_reason: GenerateComplete.FinishReason
prompt_tokens: int
completion_tokens: int
cached_tokens: int
all_logprobs: _containers.RepeatedCompositeFieldContainer[LogProbs]
all_logprobs: _containers.RepeatedCompositeFieldContainer[LogProbs]
all_hidden_states: _containers.RepeatedCompositeFieldContainer[HiddenStates]
all_hidden_states: _containers.RepeatedCompositeFieldContainer[HiddenStates]
def __init__(self, output_ids: _Optional[_Iterable[int]] = ...,
output_text: _Optional[str] = ...,
finish_reason: _Optional[_Union[GenerateComplete.FinishReason, str]] = ..., all_logprobs: _Optional[_Iterable[_Union[LogProbs, _Mapping]]] = ..., all_hidden_states: _Optional[_Iterable[_Union[HiddenStates, _Mapping]]] = ...) -> None: ...
def __init__(self, output_ids: _Optional[_Iterable[int]] = ..., finish_reason: _Optional[_Union[GenerateComplete.FinishReason, str]] = ...,
prompt_tokens: _Optional[int] = ..., completion_tokens: _Optional[int] = ..., cached_tokens: _Optional[int] = ...,
all_logprobs: _Optional[_Iterable[_Union[LogProbs, _Mapping]]] = ..., all_hidden_states: _Optional[_Iterable[_Union[HiddenStates, _Mapping]]] = ...) -> None: ...
class GenerateError(_message.Message):
class GenerateError(_message.Message):
__slots__ = ("message", "http_status_code", "details")
__slots__ = ("message", "http_status_code", "details")
...
@@ -283,20 +281,18 @@ class EmbedResponse(_message.Message):
...
@@ -283,20 +281,18 @@ class EmbedResponse(_message.Message):
def __init__(self, request_id: _Optional[str] = ..., complete: _Optional[_Union[EmbedComplete, _Mapping]] = ..., error: _Optional[_Union[EmbedError, _Mapping]] = ...) -> None: ...
def __init__(self, request_id: _Optional[str] = ..., complete: _Optional[_Union[EmbedComplete, _Mapping]] = ..., error: _Optional[_Union[EmbedError, _Mapping]] = ...) -> None: ...
class EmbedComplete(_message.Message):
class EmbedComplete(_message.Message):
__slots__ = ("embedding", "prompt_tokens", "cached_tokens", "embedding_dim",
"generation_time",
"batch_embeddings")
__slots__ = ("embedding", "prompt_tokens", "cached_tokens", "embedding_dim", "batch_embeddings")
EMBEDDING_FIELD_NUMBER: _ClassVar[int]
EMBEDDING_FIELD_NUMBER: _ClassVar[int]
PROMPT_TOKENS_FIELD_NUMBER: _ClassVar[int]
PROMPT_TOKENS_FIELD_NUMBER: _ClassVar[int]
CACHED_TOKENS_FIELD_NUMBER: _ClassVar[int]
CACHED_TOKENS_FIELD_NUMBER: _ClassVar[int]
EMBEDDING_DIM_FIELD_NUMBER: _ClassVar[int]
EMBEDDING_DIM_FIELD_NUMBER: _ClassVar[int]
GENERATION_TIME_FIELD_NUMBER: _ClassVar[int]
BATCH_EMBEDDINGS_FIELD_NUMBER: _ClassVar[int]
BATCH_EMBEDDINGS_FIELD_NUMBER: _ClassVar[int]
embedding: _containers.RepeatedScalarFieldContainer[float]
embedding: _containers.RepeatedScalarFieldContainer[float]
prompt_tokens: int
prompt_tokens: int
cached_tokens: int
cached_tokens: int
embedding_dim: int
embedding_dim: int
generation_time: float
batch_embeddings: _containers.RepeatedCompositeFieldContainer[Embedding]
batch_embeddings: _containers.RepeatedCompositeFieldContainer[Embedding]
def __init__(self, embedding: _Optional[_Iterable[float]] = ..., prompt_tokens: _Optional[int] = ..., cached_tokens: _Optional[int] = ..., embedding_dim: _Optional[int] = ...,
generation_time: _Optional[float] = ...,
batch_embeddings: _Optional[_Iterable[_Union[Embedding, _Mapping]]] = ...) -> None: ...
def __init__(self, embedding: _Optional[_Iterable[float]] = ..., prompt_tokens: _Optional[int] = ..., cached_tokens: _Optional[int] = ..., embedding_dim: _Optional[int] = ..., batch_embeddings: _Optional[_Iterable[_Union[Embedding, _Mapping]]] = ...) -> None: ...
class Embedding(_message.Message):
class Embedding(_message.Message):
__slots__ = ("values", "index")
__slots__ = ("values", "index")
...
...
sgl-router/src/proto/sglang_scheduler.proto
View file @
7dcd689b
...
@@ -165,28 +165,22 @@ message GenerateResponse {
...
@@ -165,28 +165,22 @@ message GenerateResponse {
message
GenerateStreamChunk
{
message
GenerateStreamChunk
{
// Generated token
// Generated token
int32
token_id
=
1
;
int32
token_id
=
1
;
string
text
=
2
;
// Cumulative counts
// Cumulative counts
int32
prompt_tokens
=
3
;
int32
prompt_tokens
=
2
;
int32
completion_tokens
=
4
;
int32
completion_tokens
=
3
;
int32
cached_tokens
=
5
;
int32
cached_tokens
=
4
;
// Logprobs (if requested)
// Logprobs (if requested)
LogProbs
logprobs
=
6
;
LogProbs
logprobs
=
5
;
// Hidden states (if requested)
// Hidden states (if requested)
repeated
float
hidden_states
=
7
;
repeated
float
hidden_states
=
6
;
// Metadata
float
generation_time
=
8
;
// Time to generate this token
int32
queue_time
=
9
;
// Time spent in queue
}
}
message
GenerateComplete
{
message
GenerateComplete
{
// Final output
// Final output
repeated
int32
output_ids
=
1
;
repeated
int32
output_ids
=
1
;
string
output_text
=
2
;
// Finish reason
// Finish reason
enum
FinishReason
{
enum
FinishReason
{
...
@@ -201,13 +195,18 @@ message GenerateComplete {
...
@@ -201,13 +195,18 @@ message GenerateComplete {
// The request was aborted by the user or system.
// The request was aborted by the user or system.
ABORT
=
4
;
ABORT
=
4
;
}
}
FinishReason
finish_reason
=
3
;
FinishReason
finish_reason
=
2
;
// Token usage counts
int32
prompt_tokens
=
3
;
int32
completion_tokens
=
4
;
int32
cached_tokens
=
5
;
// All logprobs if requested
// All logprobs if requested
repeated
LogProbs
all_logprobs
=
11
;
repeated
LogProbs
all_logprobs
=
6
;
// All hidden states if requested
// All hidden states if requested
repeated
HiddenStates
all_hidden_states
=
12
;
repeated
HiddenStates
all_hidden_states
=
7
;
}
}
message
GenerateError
{
message
GenerateError
{
...
@@ -285,10 +284,9 @@ message EmbedComplete {
...
@@ -285,10 +284,9 @@ message EmbedComplete {
// Additional metadata
// Additional metadata
int32
embedding_dim
=
4
;
int32
embedding_dim
=
4
;
float
generation_time
=
5
;
// For batch embeddings
// For batch embeddings
repeated
Embedding
batch_embeddings
=
6
;
repeated
Embedding
batch_embeddings
=
5
;
}
}
message
Embedding
{
message
Embedding
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment