Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
a578d300
Unverified
Commit
a578d300
authored
Oct 06, 2025
by
Chang Su
Committed by
GitHub
Oct 06, 2025
Browse files
[router][grpc] Fix proto3 default value mismatches and cleanup unused fields (#11283)
parent
8c967037
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
153 additions
and
117 deletions
+153
-117
python/sglang/srt/entrypoints/grpc_server.py
python/sglang/srt/entrypoints/grpc_server.py
+39
-14
python/sglang/srt/grpc/sglang_scheduler.proto
python/sglang/srt/grpc/sglang_scheduler.proto
+12
-13
python/sglang/srt/grpc/sglang_scheduler_pb2.py
python/sglang/srt/grpc/sglang_scheduler_pb2.py
+70
-70
python/sglang/srt/grpc/sglang_scheduler_pb2.pyi
python/sglang/srt/grpc/sglang_scheduler_pb2.pyi
+2
-6
sgl-router/src/grpc_client/sglang_scheduler.rs
sgl-router/src/grpc_client/sglang_scheduler.rs
+18
-1
sgl-router/src/proto/sglang_scheduler.proto
sgl-router/src/proto/sglang_scheduler.proto
+12
-13
No files found.
python/sglang/srt/entrypoints/grpc_server.py
View file @
a578d300
...
...
@@ -14,6 +14,7 @@ from concurrent import futures
from
typing
import
AsyncIterator
,
Dict
,
Optional
,
Tuple
import
grpc
from
google.protobuf.json_format
import
MessageToDict
from
grpc_reflection.v1alpha
import
reflection
from
sglang.srt.disaggregation.utils
import
FAKE_BOOTSTRAP_HOST
,
DisaggregationMode
...
...
@@ -483,28 +484,52 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
elif
grpc_params
.
HasField
(
"structural_tag"
):
structural_tag
=
grpc_params
.
structural_tag
# Handle optional parameters conversion
custom_params
=
(
MessageToDict
(
grpc_params
.
custom_params
)
if
grpc_params
.
HasField
(
"custom_params"
)
else
None
)
max_new_tokens
=
(
grpc_params
.
max_new_tokens
if
grpc_params
.
HasField
(
"max_new_tokens"
)
else
None
)
stream_interval
=
(
grpc_params
.
stream_interval
if
grpc_params
.
HasField
(
"stream_interval"
)
else
None
)
logit_bias
=
dict
(
grpc_params
.
logit_bias
)
if
grpc_params
.
logit_bias
else
None
stop
=
list
(
grpc_params
.
stop
)
if
grpc_params
.
stop
else
None
stop_token_ids
=
(
list
(
grpc_params
.
stop_token_ids
)
if
grpc_params
.
stop_token_ids
else
None
)
return
SGLSamplingParams
(
temperature
=
grpc_params
.
temperature
or
1.0
,
top_p
=
grpc_params
.
top_p
or
1.0
,
top_k
=
grpc_params
.
top_k
or
-
1
,
min_p
=
grpc_params
.
min_p
or
0.0
,
frequency_penalty
=
grpc_params
.
frequency_penalty
or
0.0
,
presence_penalty
=
grpc_params
.
presence_penalty
or
0.0
,
repetition_penalty
=
grpc_params
.
repetition_penalty
or
1.0
,
max_new_tokens
=
grpc_params
.
max_new_tokens
or
128
,
min_new_tokens
=
grpc_params
.
min_new_tokens
or
0
,
stop
=
list
(
grpc_params
.
stop
)
if
grpc_params
.
stop
else
[],
stop_token_ids
=
(
list
(
grpc_params
.
stop_token_ids
)
if
grpc_params
.
stop_token_ids
else
[]
),
temperature
=
grpc_params
.
temperature
,
top_p
=
grpc_params
.
top_p
,
top_k
=
grpc_params
.
top_k
,
min_p
=
grpc_params
.
min_p
,
frequency_penalty
=
grpc_params
.
frequency_penalty
,
presence_penalty
=
grpc_params
.
presence_penalty
,
repetition_penalty
=
grpc_params
.
repetition_penalty
,
max_new_tokens
=
max_new_tokens
,
min_new_tokens
=
grpc_params
.
min_new_tokens
,
stop
=
stop
,
stop_token_ids
=
stop_token_ids
,
skip_special_tokens
=
grpc_params
.
skip_special_tokens
,
spaces_between_special_tokens
=
grpc_params
.
spaces_between_special_tokens
,
no_stop_trim
=
grpc_params
.
no_stop_trim
,
regex
=
regex
,
json_schema
=
json_schema
,
ebnf
=
ebnf_grammar
,
structural_tag
=
structural_tag
,
n
=
grpc_params
.
n
or
1
,
n
=
grpc_params
.
n
,
ignore_eos
=
grpc_params
.
ignore_eos
,
stream_interval
=
stream_interval
,
logit_bias
=
logit_bias
,
custom_params
=
custom_params
,
)
def
_convert_output_logprobs_to_proto
(
...
...
python/sglang/srt/grpc/sglang_scheduler.proto
View file @
a578d300
...
...
@@ -27,6 +27,11 @@ service SglangScheduler {
// =====================
// Sampling parameters matching SGLang's SamplingParams
//
// IMPORTANT: Do not use SamplingParams::default() directly!
// The proto3 defaults (0 for numeric fields) do NOT match the semantic defaults
// (temperature=1.0, top_p=1.0, top_k=-1, etc.). Always construct with explicit values
// or use the conversion functions in sglang_scheduler.rs / grpc_server.py.
message
SamplingParams
{
float
temperature
=
1
;
float
top_p
=
2
;
...
...
@@ -50,24 +55,18 @@ message SamplingParams {
string
structural_tag
=
16
;
}
// LoRA adapter
string
lora_path
=
17
;
// Speculative decoding
int32
n
=
18
;
// Number of samples
// Token healing
bool
token_healing
=
19
;
int32
n
=
17
;
// Number of samples
// Additional parameters
int32
min_new_tokens
=
20
;
bool
ignore_eos
=
2
1
;
bool
no_stop_trim
=
2
2
;
int32
stream_interval
=
2
3
;
map
<
string
,
float
>
logit_bias
=
2
4
;
int32
min_new_tokens
=
18
;
bool
ignore_eos
=
1
9
;
bool
no_stop_trim
=
2
0
;
optional
int32
stream_interval
=
2
1
;
map
<
string
,
float
>
logit_bias
=
2
2
;
// Custom parameters for extensibility
google.protobuf.Struct
custom_params
=
2
5
;
google.protobuf.Struct
custom_params
=
2
3
;
}
...
...
python/sglang/srt/grpc/sglang_scheduler_pb2.py
View file @
a578d300
...
...
@@ -29,7 +29,7 @@ from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__
from
google.protobuf
import
struct_pb2
as
google_dot_protobuf_dot_struct__pb2
DESCRIPTOR
=
_descriptor_pool
.
Default
().
AddSerializedFile
(
b
'
\n\x16
sglang_scheduler.proto
\x12\x15
sglang.grpc.scheduler
\x1a\x1f
google/protobuf/timestamp.proto
\x1a\x1c
google/protobuf/struct.proto
\"\x
e1
\x05\n\x0e
SamplingParams
\x12\x13\n\x0b
temperature
\x18\x01
\x01
(
\x02\x12\r\n\x05
top_p
\x18\x02
\x01
(
\x02\x12\r\n\x05
top_k
\x18\x03
\x01
(
\x05\x12\r\n\x05
min_p
\x18\x04
\x01
(
\x02\x12\x19\n\x11\x66
requency_penalty
\x18\x05
\x01
(
\x02\x12\x18\n\x10
presence_penalty
\x18\x06
\x01
(
\x02\x12\x1a\n\x12
repetition_penalty
\x18\x07
\x01
(
\x02\x12\x1b\n\x0e
max_new_tokens
\x18\x08
\x01
(
\x05
H
\x01\x88\x01\x01\x12\x0c\n\x04
stop
\x18\t
\x03
(
\t\x12\x16\n\x0e
stop_token_ids
\x18\n
\x03
(
\r\x12\x1b\n\x13
skip_special_tokens
\x18\x0b
\x01
(
\x08\x12
%
\n\x1d
spaces_between_special_tokens
\x18\x0c
\x01
(
\x08\x12\x0f\n\x05
regex
\x18\r
\x01
(
\t
H
\x00\x12\x15\n\x0b
json_schema
\x18\x0e
\x01
(
\t
H
\x00\x12\x16\n\x0c\x65\x62
nf_grammar
\x18\x0f
\x01
(
\t
H
\x00\x12\x18\n\x0e
structural_tag
\x18\x10
\x01
(
\t
H
\x00\x12\
x11\n\t
lora_path
\x18\x11
\x01
(
\t\x12\t\n\x01
n
\x18\x12
\x01
(
\x05\x12\x15\n\r
token_healing
\x18\x13
\x01
(
\x0
8
\x12\x16\n\x0e
min_new_tokens
\x18\x1
4
\x01
(
\x05\x12\x12\n\n
ignore_eos
\x18\x1
5
\x01
(
\x08\x12\x14\n\x0c
no_stop_trim
\x18\x1
6
\x01
(
\x08\x12\x1
7
\n\x0f
stream_interval
\x18\x1
7
\x01
(
\x05\x12
H
\n\n
logit_bias
\x18\x1
8
\x03
(
\x0b\x32\x34
.sglang.grpc.scheduler.SamplingParams.LogitBiasEntry
\x12
.
\n\r
custom_params
\x18\x1
9
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\x1a\x30\n\x0e
LogitBiasEntry
\x12\x0b\n\x03
key
\x18\x01
\x01
(
\t\x12\r\n\x05
value
\x18\x02
\x01
(
\x02
:
\x02\x38\x01\x42\x0c\n\n
constraintB
\x11\n\x0f
_max_new_tokens
\"
]
\n\x13\x44
isaggregatedParams
\x12\x16\n\x0e\x62
ootstrap_host
\x18\x01
\x01
(
\t\x12\x16\n\x0e\x62
ootstrap_port
\x18\x02
\x01
(
\x05\x12\x16\n\x0e\x62
ootstrap_room
\x18\x03
\x01
(
\x05\"\xe2\x04\n\x0f
GenerateRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\t
tokenized
\x18\x02
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\x12
:
\n\t
mm_inputs
\x18\x03
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.MultimodalInputs
\x12
>
\n\x0f
sampling_params
\x18\x04
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.SamplingParams
\x12\x16\n\x0e
return_logprob
\x18\x05
\x01
(
\x08\x12\x19\n\x11
logprob_start_len
\x18\x06
\x01
(
\x05\x12\x18\n\x10
top_logprobs_num
\x18\x07
\x01
(
\x05\x12\x19\n\x11
token_ids_logprob
\x18\x08
\x03
(
\r\x12\x1c\n\x14
return_hidden_states
\x18\t
\x01
(
\x08\x12
H
\n\x14\x64
isaggregated_params
\x18\n
\x01
(
\x0b\x32
*.sglang.grpc.scheduler.DisaggregatedParams
\x12\x1e\n\x16\x63
ustom_logit_processor
\x18\x0b
\x01
(
\t\x12
-
\n\t
timestamp
\x18\x0c
\x01
(
\x0b\x32\x1a
.google.protobuf.Timestamp
\x12\x13\n\x0b
log_metrics
\x18\r
\x01
(
\x08\x12\x14\n\x0c
input_embeds
\x18\x0e
\x03
(
\x02\x12\x0f\n\x07
lora_id
\x18\x0f
\x01
(
\t\x12\x1a\n\x12\x64\x61
ta_parallel_rank
\x18\x10
\x01
(
\x05\x12\x0e\n\x06
stream
\x18\x11
\x01
(
\x08\"
:
\n\x0e
TokenizedInput
\x12\x15\n\r
original_text
\x18\x01
\x01
(
\t\x12\x11\n\t
input_ids
\x18\x02
\x03
(
\r\"\xd3\x01\n\x10
MultimodalInputs
\x12\x12\n\n
image_urls
\x18\x01
\x03
(
\t\x12\x12\n\n
video_urls
\x18\x02
\x03
(
\t\x12\x12\n\n
audio_urls
\x18\x03
\x03
(
\t\x12\x33\n\x12
processed_features
\x18\x04
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\x12\x12\n\n
image_data
\x18\x05
\x03
(
\x0c\x12\x12\n\n
video_data
\x18\x06
\x03
(
\x0c\x12\x12\n\n
audio_data
\x18\x07
\x03
(
\x0c\x12\x12\n\n
modalities
\x18\x08
\x03
(
\t\"\xe3\x01\n\x10
GenerateResponse
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12
;
\n\x05\x63
hunk
\x18\x02
\x01
(
\x0b\x32
*.sglang.grpc.scheduler.GenerateStreamChunkH
\x00\x12
;
\n\x08\x63
omplete
\x18\x03
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.GenerateCompleteH
\x00\x12\x35\n\x05\x65
rror
\x18\x04
\x01
(
\x0b\x32
$.sglang.grpc.scheduler.GenerateErrorH
\x00\x42\n\n\x08
response
\"\x95\x02\n\x13
GenerateStreamChunk
\x12\x11\n\t
token_ids
\x18\x01
\x03
(
\r\x12\x15\n\r
prompt_tokens
\x18\x02
\x01
(
\x05\x12\x19\n\x11\x63
ompletion_tokens
\x18\x03
\x01
(
\x05\x12\x15\n\r
cached_tokens
\x18\x04
\x01
(
\x05\x12
>
\n\x0f
output_logprobs
\x18\x05
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.OutputLogProbs
\x12\x15\n\r
hidden_states
\x18\x06
\x03
(
\x02\x12
<
\n\x0e
input_logprobs
\x18\x07
\x01
(
\x0b\x32
$.sglang.grpc.scheduler.InputLogProbs
\x12\r\n\x05
index
\x18\x08
\x01
(
\r\"\x9b\x03\n\x10
GenerateComplete
\x12\x12\n\n
output_ids
\x18\x01
\x03
(
\r\x12\x15\n\r
finish_reason
\x18\x02
\x01
(
\t\x12\x15\n\r
prompt_tokens
\x18\x03
\x01
(
\x05\x12\x19\n\x11\x63
ompletion_tokens
\x18\x04
\x01
(
\x05\x12\x15\n\r
cached_tokens
\x18\x05
\x01
(
\x05\x12
>
\n\x0f
output_logprobs
\x18\x06
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.OutputLogProbs
\x12
>
\n\x11\x61
ll_hidden_states
\x18\x07
\x03
(
\x0b\x32
#.sglang.grpc.scheduler.HiddenStates
\x12\x1a\n\x10
matched_token_id
\x18\x08
\x01
(
\r
H
\x00\x12\x1a\n\x10
matched_stop_str
\x18\t
\x01
(
\t
H
\x00\x12
<
\n\x0e
input_logprobs
\x18\n
\x01
(
\x0b\x32
$.sglang.grpc.scheduler.InputLogProbs
\x12\r\n\x05
index
\x18\x0b
\x01
(
\r
B
\x0e\n\x0c
matched_stop
\"
K
\n\r
GenerateError
\x12\x0f\n\x07
message
\x18\x01
\x01
(
\t\x12\x18\n\x10
http_status_code
\x18\x02
\x01
(
\t\x12\x0f\n\x07\x64\x65
tails
\x18\x03
\x01
(
\t\"
u
\n\x0e
OutputLogProbs
\x12\x16\n\x0e
token_logprobs
\x18\x01
\x03
(
\x02\x12\x11\n\t
token_ids
\x18\x02
\x03
(
\x05\x12\x38\n\x0c
top_logprobs
\x18\x03
\x03
(
\x0b\x32\"
.sglang.grpc.scheduler.TopLogProbs
\"\x9e\x01\n\r
InputLogProbs
\x12
@
\n\x0e
token_logprobs
\x18\x01
\x03
(
\x0b\x32
(.sglang.grpc.scheduler.InputTokenLogProb
\x12\x11\n\t
token_ids
\x18\x02
\x03
(
\x05\x12\x38\n\x0c
top_logprobs
\x18\x03
\x03
(
\x0b\x32\"
.sglang.grpc.scheduler.TopLogProbs
\"
1
\n\x11
InputTokenLogProb
\x12\x12\n\x05
value
\x18\x01
\x01
(
\x02
H
\x00\x88\x01\x01\x42\x08\n\x06
_value
\"
0
\n\x0b
TopLogProbs
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\x11\n\t
token_ids
\x18\x02
\x03
(
\x05\"
?
\n\x0c
HiddenStates
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\r\n\x05
layer
\x18\x02
\x01
(
\x05\x12\x10\n\x08
position
\x18\x03
\x01
(
\x05\"\xca\x02\n\x0c\x45
mbedRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\t
tokenized
\x18\x02
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\x12
:
\n\t
mm_inputs
\x18\x04
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.MultimodalInputs
\x12
>
\n\x0f
sampling_params
\x18\x05
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.SamplingParams
\x12\x13\n\x0b
log_metrics
\x18\x06
\x01
(
\x08\x12\x16\n\x0e
token_type_ids
\x18\x07
\x03
(
\x05\x12\x1a\n\x12\x64\x61
ta_parallel_rank
\x18\x08
\x01
(
\x05\x12\x18\n\x10
is_cross_encoder
\x18\t
\x01
(
\x08\x12\r\n\x05
texts
\x18\n
\x03
(
\t\"\x9d\x01\n\r
EmbedResponse
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\x08\x63
omplete
\x18\x02
\x01
(
\x0b\x32
$.sglang.grpc.scheduler.EmbedCompleteH
\x00\x12\x32\n\x05\x65
rror
\x18\x03
\x01
(
\x0b\x32
!.sglang.grpc.scheduler.EmbedErrorH
\x00\x42\n\n\x08
response
\"\xa3\x01\n\r
EmbedComplete
\x12\x11\n\t
embedding
\x18\x01
\x03
(
\x02\x12\x15\n\r
prompt_tokens
\x18\x02
\x01
(
\x05\x12\x15\n\r
cached_tokens
\x18\x03
\x01
(
\x05\x12\x15\n\r
embedding_dim
\x18\x04
\x01
(
\x05\x12
:
\n\x10\x62\x61
tch_embeddings
\x18\x05
\x03
(
\x0b\x32
.sglang.grpc.scheduler.Embedding
\"
*
\n\t
Embedding
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\r\n\x05
index
\x18\x02
\x01
(
\x05\"
<
\n\n
EmbedError
\x12\x0f\n\x07
message
\x18\x01
\x01
(
\t\x12\x0c\n\x04\x63
ode
\x18\x02
\x01
(
\t\x12\x0f\n\x07\x64\x65
tails
\x18\x03
\x01
(
\t\"
N
\n\x12
HealthCheckRequest
\x12\x38\n\t
tokenized
\x18\x01
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\"
7
\n\x13
HealthCheckResponse
\x12\x0f\n\x07
healthy
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
2
\n\x0c\x41\x62
ortRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x0e\n\x06
reason
\x18\x02
\x01
(
\t\"
1
\n\r
AbortResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
I
\n\x0f
LoadLoRARequest
\x12\x12\n\n
adapter_id
\x18\x01
\x01
(
\t\x12\x14\n\x0c\x61\x64\x61
pter_path
\x18\x02
\x01
(
\t\x12\x0c\n\x04
rank
\x18\x03
\x01
(
\x05\"
H
\n\x10
LoadLoRAResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x12\n\n
adapter_id
\x18\x02
\x01
(
\t\x12\x0f\n\x07
message
\x18\x03
\x01
(
\t\"\'\n\x11
UnloadLoRARequest
\x12\x12\n\n
adapter_id
\x18\x01
\x01
(
\t\"
6
\n\x12
UnloadLoRAResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
w
\n\x14
UpdateWeightsRequest
\x12\x13\n\t
disk_path
\x18\x01
\x01
(
\t
H
\x00\x12\x15\n\x0b
tensor_data
\x18\x02
\x01
(
\x0c
H
\x00\x12\x14\n\n
remote_url
\x18\x03
\x01
(
\t
H
\x00\x12\x13\n\x0b
weight_name
\x18\x04
\x01
(
\t
B
\x08\n\x06
source
\"
9
\n\x15
UpdateWeightsResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
-
\n\x17
GetInternalStateRequest
\x12\x12\n\n
state_keys
\x18\x01
\x03
(
\t\"
B
\n\x18
GetInternalStateResponse
\x12
&
\n\x05
state
\x18\x01
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\"
A
\n\x17
SetInternalStateRequest
\x12
&
\n\x05
state
\x18\x01
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\"
<
\n\x18
SetInternalStateResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t
2
\xfe\x02\n\x0f
SglangScheduler
\x12
]
\n\x08
Generate
\x12
&.sglang.grpc.scheduler.GenerateRequest
\x1a\'
.sglang.grpc.scheduler.GenerateResponse0
\x01\x12
R
\n\x05\x45
mbed
\x12
#.sglang.grpc.scheduler.EmbedRequest
\x1a
$.sglang.grpc.scheduler.EmbedResponse
\x12\x64\n\x0b
HealthCheck
\x12
).sglang.grpc.scheduler.HealthCheckRequest
\x1a
*.sglang.grpc.scheduler.HealthCheckResponse
\x12
R
\n\x05\x41\x62
ort
\x12
#.sglang.grpc.scheduler.AbortRequest
\x1a
$.sglang.grpc.scheduler.AbortResponseb
\x06
proto3'
)
DESCRIPTOR
=
_descriptor_pool
.
Default
().
AddSerializedFile
(
b
'
\n\x16
sglang_scheduler.proto
\x12\x15
sglang.grpc.scheduler
\x1a\x1f
google/protobuf/timestamp.proto
\x1a\x1c
google/protobuf/struct.proto
\"\x
d0
\x05\n\x0e
SamplingParams
\x12\x13\n\x0b
temperature
\x18\x01
\x01
(
\x02\x12\r\n\x05
top_p
\x18\x02
\x01
(
\x02\x12\r\n\x05
top_k
\x18\x03
\x01
(
\x05\x12\r\n\x05
min_p
\x18\x04
\x01
(
\x02\x12\x19\n\x11\x66
requency_penalty
\x18\x05
\x01
(
\x02\x12\x18\n\x10
presence_penalty
\x18\x06
\x01
(
\x02\x12\x1a\n\x12
repetition_penalty
\x18\x07
\x01
(
\x02\x12\x1b\n\x0e
max_new_tokens
\x18\x08
\x01
(
\x05
H
\x01\x88\x01\x01\x12\x0c\n\x04
stop
\x18\t
\x03
(
\t\x12\x16\n\x0e
stop_token_ids
\x18\n
\x03
(
\r\x12\x1b\n\x13
skip_special_tokens
\x18\x0b
\x01
(
\x08\x12
%
\n\x1d
spaces_between_special_tokens
\x18\x0c
\x01
(
\x08\x12\x0f\n\x05
regex
\x18\r
\x01
(
\t
H
\x00\x12\x15\n\x0b
json_schema
\x18\x0e
\x01
(
\t
H
\x00\x12\x16\n\x0c\x65\x62
nf_grammar
\x18\x0f
\x01
(
\t
H
\x00\x12\x18\n\x0e
structural_tag
\x18\x10
\x01
(
\t
H
\x00\x12\
t\n\x01
n
\x18\x11
\x01
(
\x0
5
\x12\x16\n\x0e
min_new_tokens
\x18\x1
2
\x01
(
\x05\x12\x12\n\n
ignore_eos
\x18\x1
3
\x01
(
\x08\x12\x14\n\x0c
no_stop_trim
\x18\x1
4
\x01
(
\x08\x12\x1
c
\n\x0f
stream_interval
\x18\x1
5
\x01
(
\x05
H
\x02\x88\x01\x01
\x12
H
\n\n
logit_bias
\x18\x1
6
\x03
(
\x0b\x32\x34
.sglang.grpc.scheduler.SamplingParams.LogitBiasEntry
\x12
.
\n\r
custom_params
\x18\x1
7
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\x1a\x30\n\x0e
LogitBiasEntry
\x12\x0b\n\x03
key
\x18\x01
\x01
(
\t\x12\r\n\x05
value
\x18\x02
\x01
(
\x02
:
\x02\x38\x01\x42\x0c\n\n
constraintB
\x11\n\x0f
_max_new_tokens
B
\x12\n\x10
_stream_interval
\"
]
\n\x13\x44
isaggregatedParams
\x12\x16\n\x0e\x62
ootstrap_host
\x18\x01
\x01
(
\t\x12\x16\n\x0e\x62
ootstrap_port
\x18\x02
\x01
(
\x05\x12\x16\n\x0e\x62
ootstrap_room
\x18\x03
\x01
(
\x05\"\xe2\x04\n\x0f
GenerateRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\t
tokenized
\x18\x02
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\x12
:
\n\t
mm_inputs
\x18\x03
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.MultimodalInputs
\x12
>
\n\x0f
sampling_params
\x18\x04
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.SamplingParams
\x12\x16\n\x0e
return_logprob
\x18\x05
\x01
(
\x08\x12\x19\n\x11
logprob_start_len
\x18\x06
\x01
(
\x05\x12\x18\n\x10
top_logprobs_num
\x18\x07
\x01
(
\x05\x12\x19\n\x11
token_ids_logprob
\x18\x08
\x03
(
\r\x12\x1c\n\x14
return_hidden_states
\x18\t
\x01
(
\x08\x12
H
\n\x14\x64
isaggregated_params
\x18\n
\x01
(
\x0b\x32
*.sglang.grpc.scheduler.DisaggregatedParams
\x12\x1e\n\x16\x63
ustom_logit_processor
\x18\x0b
\x01
(
\t\x12
-
\n\t
timestamp
\x18\x0c
\x01
(
\x0b\x32\x1a
.google.protobuf.Timestamp
\x12\x13\n\x0b
log_metrics
\x18\r
\x01
(
\x08\x12\x14\n\x0c
input_embeds
\x18\x0e
\x03
(
\x02\x12\x0f\n\x07
lora_id
\x18\x0f
\x01
(
\t\x12\x1a\n\x12\x64\x61
ta_parallel_rank
\x18\x10
\x01
(
\x05\x12\x0e\n\x06
stream
\x18\x11
\x01
(
\x08\"
:
\n\x0e
TokenizedInput
\x12\x15\n\r
original_text
\x18\x01
\x01
(
\t\x12\x11\n\t
input_ids
\x18\x02
\x03
(
\r\"\xd3\x01\n\x10
MultimodalInputs
\x12\x12\n\n
image_urls
\x18\x01
\x03
(
\t\x12\x12\n\n
video_urls
\x18\x02
\x03
(
\t\x12\x12\n\n
audio_urls
\x18\x03
\x03
(
\t\x12\x33\n\x12
processed_features
\x18\x04
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\x12\x12\n\n
image_data
\x18\x05
\x03
(
\x0c\x12\x12\n\n
video_data
\x18\x06
\x03
(
\x0c\x12\x12\n\n
audio_data
\x18\x07
\x03
(
\x0c\x12\x12\n\n
modalities
\x18\x08
\x03
(
\t\"\xe3\x01\n\x10
GenerateResponse
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12
;
\n\x05\x63
hunk
\x18\x02
\x01
(
\x0b\x32
*.sglang.grpc.scheduler.GenerateStreamChunkH
\x00\x12
;
\n\x08\x63
omplete
\x18\x03
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.GenerateCompleteH
\x00\x12\x35\n\x05\x65
rror
\x18\x04
\x01
(
\x0b\x32
$.sglang.grpc.scheduler.GenerateErrorH
\x00\x42\n\n\x08
response
\"\x95\x02\n\x13
GenerateStreamChunk
\x12\x11\n\t
token_ids
\x18\x01
\x03
(
\r\x12\x15\n\r
prompt_tokens
\x18\x02
\x01
(
\x05\x12\x19\n\x11\x63
ompletion_tokens
\x18\x03
\x01
(
\x05\x12\x15\n\r
cached_tokens
\x18\x04
\x01
(
\x05\x12
>
\n\x0f
output_logprobs
\x18\x05
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.OutputLogProbs
\x12\x15\n\r
hidden_states
\x18\x06
\x03
(
\x02\x12
<
\n\x0e
input_logprobs
\x18\x07
\x01
(
\x0b\x32
$.sglang.grpc.scheduler.InputLogProbs
\x12\r\n\x05
index
\x18\x08
\x01
(
\r\"\x9b\x03\n\x10
GenerateComplete
\x12\x12\n\n
output_ids
\x18\x01
\x03
(
\r\x12\x15\n\r
finish_reason
\x18\x02
\x01
(
\t\x12\x15\n\r
prompt_tokens
\x18\x03
\x01
(
\x05\x12\x19\n\x11\x63
ompletion_tokens
\x18\x04
\x01
(
\x05\x12\x15\n\r
cached_tokens
\x18\x05
\x01
(
\x05\x12
>
\n\x0f
output_logprobs
\x18\x06
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.OutputLogProbs
\x12
>
\n\x11\x61
ll_hidden_states
\x18\x07
\x03
(
\x0b\x32
#.sglang.grpc.scheduler.HiddenStates
\x12\x1a\n\x10
matched_token_id
\x18\x08
\x01
(
\r
H
\x00\x12\x1a\n\x10
matched_stop_str
\x18\t
\x01
(
\t
H
\x00\x12
<
\n\x0e
input_logprobs
\x18\n
\x01
(
\x0b\x32
$.sglang.grpc.scheduler.InputLogProbs
\x12\r\n\x05
index
\x18\x0b
\x01
(
\r
B
\x0e\n\x0c
matched_stop
\"
K
\n\r
GenerateError
\x12\x0f\n\x07
message
\x18\x01
\x01
(
\t\x12\x18\n\x10
http_status_code
\x18\x02
\x01
(
\t\x12\x0f\n\x07\x64\x65
tails
\x18\x03
\x01
(
\t\"
u
\n\x0e
OutputLogProbs
\x12\x16\n\x0e
token_logprobs
\x18\x01
\x03
(
\x02\x12\x11\n\t
token_ids
\x18\x02
\x03
(
\x05\x12\x38\n\x0c
top_logprobs
\x18\x03
\x03
(
\x0b\x32\"
.sglang.grpc.scheduler.TopLogProbs
\"\x9e\x01\n\r
InputLogProbs
\x12
@
\n\x0e
token_logprobs
\x18\x01
\x03
(
\x0b\x32
(.sglang.grpc.scheduler.InputTokenLogProb
\x12\x11\n\t
token_ids
\x18\x02
\x03
(
\x05\x12\x38\n\x0c
top_logprobs
\x18\x03
\x03
(
\x0b\x32\"
.sglang.grpc.scheduler.TopLogProbs
\"
1
\n\x11
InputTokenLogProb
\x12\x12\n\x05
value
\x18\x01
\x01
(
\x02
H
\x00\x88\x01\x01\x42\x08\n\x06
_value
\"
0
\n\x0b
TopLogProbs
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\x11\n\t
token_ids
\x18\x02
\x03
(
\x05\"
?
\n\x0c
HiddenStates
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\r\n\x05
layer
\x18\x02
\x01
(
\x05\x12\x10\n\x08
position
\x18\x03
\x01
(
\x05\"\xca\x02\n\x0c\x45
mbedRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\t
tokenized
\x18\x02
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\x12
:
\n\t
mm_inputs
\x18\x04
\x01
(
\x0b\x32\'
.sglang.grpc.scheduler.MultimodalInputs
\x12
>
\n\x0f
sampling_params
\x18\x05
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.SamplingParams
\x12\x13\n\x0b
log_metrics
\x18\x06
\x01
(
\x08\x12\x16\n\x0e
token_type_ids
\x18\x07
\x03
(
\x05\x12\x1a\n\x12\x64\x61
ta_parallel_rank
\x18\x08
\x01
(
\x05\x12\x18\n\x10
is_cross_encoder
\x18\t
\x01
(
\x08\x12\r\n\x05
texts
\x18\n
\x03
(
\t\"\x9d\x01\n\r
EmbedResponse
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x38\n\x08\x63
omplete
\x18\x02
\x01
(
\x0b\x32
$.sglang.grpc.scheduler.EmbedCompleteH
\x00\x12\x32\n\x05\x65
rror
\x18\x03
\x01
(
\x0b\x32
!.sglang.grpc.scheduler.EmbedErrorH
\x00\x42\n\n\x08
response
\"\xa3\x01\n\r
EmbedComplete
\x12\x11\n\t
embedding
\x18\x01
\x03
(
\x02\x12\x15\n\r
prompt_tokens
\x18\x02
\x01
(
\x05\x12\x15\n\r
cached_tokens
\x18\x03
\x01
(
\x05\x12\x15\n\r
embedding_dim
\x18\x04
\x01
(
\x05\x12
:
\n\x10\x62\x61
tch_embeddings
\x18\x05
\x03
(
\x0b\x32
.sglang.grpc.scheduler.Embedding
\"
*
\n\t
Embedding
\x12\x0e\n\x06
values
\x18\x01
\x03
(
\x02\x12\r\n\x05
index
\x18\x02
\x01
(
\x05\"
<
\n\n
EmbedError
\x12\x0f\n\x07
message
\x18\x01
\x01
(
\t\x12\x0c\n\x04\x63
ode
\x18\x02
\x01
(
\t\x12\x0f\n\x07\x64\x65
tails
\x18\x03
\x01
(
\t\"
N
\n\x12
HealthCheckRequest
\x12\x38\n\t
tokenized
\x18\x01
\x01
(
\x0b\x32
%.sglang.grpc.scheduler.TokenizedInput
\"
7
\n\x13
HealthCheckResponse
\x12\x0f\n\x07
healthy
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
2
\n\x0c\x41\x62
ortRequest
\x12\x12\n\n
request_id
\x18\x01
\x01
(
\t\x12\x0e\n\x06
reason
\x18\x02
\x01
(
\t\"
1
\n\r
AbortResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
I
\n\x0f
LoadLoRARequest
\x12\x12\n\n
adapter_id
\x18\x01
\x01
(
\t\x12\x14\n\x0c\x61\x64\x61
pter_path
\x18\x02
\x01
(
\t\x12\x0c\n\x04
rank
\x18\x03
\x01
(
\x05\"
H
\n\x10
LoadLoRAResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x12\n\n
adapter_id
\x18\x02
\x01
(
\t\x12\x0f\n\x07
message
\x18\x03
\x01
(
\t\"\'\n\x11
UnloadLoRARequest
\x12\x12\n\n
adapter_id
\x18\x01
\x01
(
\t\"
6
\n\x12
UnloadLoRAResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
w
\n\x14
UpdateWeightsRequest
\x12\x13\n\t
disk_path
\x18\x01
\x01
(
\t
H
\x00\x12\x15\n\x0b
tensor_data
\x18\x02
\x01
(
\x0c
H
\x00\x12\x14\n\n
remote_url
\x18\x03
\x01
(
\t
H
\x00\x12\x13\n\x0b
weight_name
\x18\x04
\x01
(
\t
B
\x08\n\x06
source
\"
9
\n\x15
UpdateWeightsResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t\"
-
\n\x17
GetInternalStateRequest
\x12\x12\n\n
state_keys
\x18\x01
\x03
(
\t\"
B
\n\x18
GetInternalStateResponse
\x12
&
\n\x05
state
\x18\x01
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\"
A
\n\x17
SetInternalStateRequest
\x12
&
\n\x05
state
\x18\x01
\x01
(
\x0b\x32\x17
.google.protobuf.Struct
\"
<
\n\x18
SetInternalStateResponse
\x12\x0f\n\x07
success
\x18\x01
\x01
(
\x08\x12\x0f\n\x07
message
\x18\x02
\x01
(
\t
2
\xfe\x02\n\x0f
SglangScheduler
\x12
]
\n\x08
Generate
\x12
&.sglang.grpc.scheduler.GenerateRequest
\x1a\'
.sglang.grpc.scheduler.GenerateResponse0
\x01\x12
R
\n\x05\x45
mbed
\x12
#.sglang.grpc.scheduler.EmbedRequest
\x1a
$.sglang.grpc.scheduler.EmbedResponse
\x12\x64\n\x0b
HealthCheck
\x12
).sglang.grpc.scheduler.HealthCheckRequest
\x1a
*.sglang.grpc.scheduler.HealthCheckResponse
\x12
R
\n\x05\x41\x62
ort
\x12
#.sglang.grpc.scheduler.AbortRequest
\x1a
$.sglang.grpc.scheduler.AbortResponseb
\x06
proto3'
)
_globals
=
globals
()
_builder
.
BuildMessageAndEnumDescriptors
(
DESCRIPTOR
,
_globals
)
...
...
@@ -39,73 +39,73 @@ if not _descriptor._USE_C_DESCRIPTORS:
_globals
[
'_SAMPLINGPARAMS_LOGITBIASENTRY'
].
_loaded_options
=
None
_globals
[
'_SAMPLINGPARAMS_LOGITBIASENTRY'
].
_serialized_options
=
b
'8
\001
'
_globals
[
'_SAMPLINGPARAMS'
].
_serialized_start
=
113
_globals
[
'_SAMPLINGPARAMS'
].
_serialized_end
=
8
50
_globals
[
'_SAMPLINGPARAMS_LOGITBIASENTRY'
].
_serialized_start
=
7
69
_globals
[
'_SAMPLINGPARAMS_LOGITBIASENTRY'
].
_serialized_end
=
817
_globals
[
'_DISAGGREGATEDPARAMS'
].
_serialized_start
=
85
2
_globals
[
'_DISAGGREGATEDPARAMS'
].
_serialized_end
=
9
45
_globals
[
'_GENERATEREQUEST'
].
_serialized_start
=
9
48
_globals
[
'_GENERATEREQUEST'
].
_serialized_end
=
15
58
_globals
[
'_TOKENIZEDINPUT'
].
_serialized_start
=
15
60
_globals
[
'_TOKENIZEDINPUT'
].
_serialized_end
=
161
8
_globals
[
'_MULTIMODALINPUTS'
].
_serialized_start
=
16
21
_globals
[
'_MULTIMODALINPUTS'
].
_serialized_end
=
18
32
_globals
[
'_GENERATERESPONSE'
].
_serialized_start
=
18
35
_globals
[
'_GENERATERESPONSE'
].
_serialized_end
=
20
62
_globals
[
'_GENERATESTREAMCHUNK'
].
_serialized_start
=
20
65
_globals
[
'_GENERATESTREAMCHUNK'
].
_serialized_end
=
23
4
2
_globals
[
'_GENERATECOMPLETE'
].
_serialized_start
=
23
45
_globals
[
'_GENERATECOMPLETE'
].
_serialized_end
=
27
56
_globals
[
'_GENERATEERROR'
].
_serialized_start
=
27
58
_globals
[
'_GENERATEERROR'
].
_serialized_end
=
28
33
_globals
[
'_OUTPUTLOGPROBS'
].
_serialized_start
=
28
35
_globals
[
'_OUTPUTLOGPROBS'
].
_serialized_end
=
295
2
_globals
[
'_INPUTLOGPROBS'
].
_serialized_start
=
29
55
_globals
[
'_INPUTLOGPROBS'
].
_serialized_end
=
3
113
_globals
[
'_INPUTTOKENLOGPROB'
].
_serialized_start
=
3
115
_globals
[
'_INPUTTOKENLOGPROB'
].
_serialized_end
=
31
6
4
_globals
[
'_TOPLOGPROBS'
].
_serialized_start
=
31
66
_globals
[
'_TOPLOGPROBS'
].
_serialized_end
=
3
214
_globals
[
'_HIDDENSTATES'
].
_serialized_start
=
3
216
_globals
[
'_HIDDENSTATES'
].
_serialized_end
=
32
79
_globals
[
'_EMBEDREQUEST'
].
_serialized_start
=
32
82
_globals
[
'_EMBEDREQUEST'
].
_serialized_end
=
3
612
_globals
[
'_EMBEDRESPONSE'
].
_serialized_start
=
3
615
_globals
[
'_EMBEDRESPONSE'
].
_serialized_end
=
37
72
_globals
[
'_EMBEDCOMPLETE'
].
_serialized_start
=
37
7
5
_globals
[
'_EMBEDCOMPLETE'
].
_serialized_end
=
39
38
_globals
[
'_EMBEDDING'
].
_serialized_start
=
39
40
_globals
[
'_EMBEDDING'
].
_serialized_end
=
39
82
_globals
[
'_EMBEDERROR'
].
_serialized_start
=
39
84
_globals
[
'_EMBEDERROR'
].
_serialized_end
=
40
44
_globals
[
'_HEALTHCHECKREQUEST'
].
_serialized_start
=
40
46
_globals
[
'_HEALTHCHECKREQUEST'
].
_serialized_end
=
41
24
_globals
[
'_HEALTHCHECKRESPONSE'
].
_serialized_start
=
41
26
_globals
[
'_HEALTHCHECKRESPONSE'
].
_serialized_end
=
41
81
_globals
[
'_ABORTREQUEST'
].
_serialized_start
=
41
83
_globals
[
'_ABORTREQUEST'
].
_serialized_end
=
42
33
_globals
[
'_ABORTRESPONSE'
].
_serialized_start
=
42
35
_globals
[
'_ABORTRESPONSE'
].
_serialized_end
=
42
84
_globals
[
'_LOADLORAREQUEST'
].
_serialized_start
=
42
8
6
_globals
[
'_LOADLORAREQUEST'
].
_serialized_end
=
43
59
_globals
[
'_LOADLORARESPONSE'
].
_serialized_start
=
43
61
_globals
[
'_LOADLORARESPONSE'
].
_serialized_end
=
44
33
_globals
[
'_UNLOADLORAREQUEST'
].
_serialized_start
=
44
35
_globals
[
'_UNLOADLORAREQUEST'
].
_serialized_end
=
447
4
_globals
[
'_UNLOADLORARESPONSE'
].
_serialized_start
=
44
76
_globals
[
'_UNLOADLORARESPONSE'
].
_serialized_end
=
453
0
_globals
[
'_UPDATEWEIGHTSREQUEST'
].
_serialized_start
=
45
32
_globals
[
'_UPDATEWEIGHTSREQUEST'
].
_serialized_end
=
46
51
_globals
[
'_UPDATEWEIGHTSRESPONSE'
].
_serialized_start
=
46
5
3
_globals
[
'_UPDATEWEIGHTSRESPONSE'
].
_serialized_end
=
4
710
_globals
[
'_GETINTERNALSTATEREQUEST'
].
_serialized_start
=
4
712
_globals
[
'_GETINTERNALSTATEREQUEST'
].
_serialized_end
=
47
57
_globals
[
'_GETINTERNALSTATERESPONSE'
].
_serialized_start
=
47
59
_globals
[
'_GETINTERNALSTATERESPONSE'
].
_serialized_end
=
48
25
_globals
[
'_SETINTERNALSTATEREQUEST'
].
_serialized_start
=
48
27
_globals
[
'_SETINTERNALSTATEREQUEST'
].
_serialized_end
=
48
92
_globals
[
'_SETINTERNALSTATERESPONSE'
].
_serialized_start
=
48
94
_globals
[
'_SETINTERNALSTATERESPONSE'
].
_serialized_end
=
49
54
_globals
[
'_SGLANGSCHEDULER'
].
_serialized_start
=
49
57
_globals
[
'_SGLANGSCHEDULER'
].
_serialized_end
=
53
39
_globals
[
'_SAMPLINGPARAMS'
].
_serialized_end
=
8
33
_globals
[
'_SAMPLINGPARAMS_LOGITBIASENTRY'
].
_serialized_start
=
7
32
_globals
[
'_SAMPLINGPARAMS_LOGITBIASENTRY'
].
_serialized_end
=
780
_globals
[
'_DISAGGREGATEDPARAMS'
].
_serialized_start
=
8
3
5
_globals
[
'_DISAGGREGATEDPARAMS'
].
_serialized_end
=
9
28
_globals
[
'_GENERATEREQUEST'
].
_serialized_start
=
9
31
_globals
[
'_GENERATEREQUEST'
].
_serialized_end
=
15
41
_globals
[
'_TOKENIZEDINPUT'
].
_serialized_start
=
15
43
_globals
[
'_TOKENIZEDINPUT'
].
_serialized_end
=
16
0
1
_globals
[
'_MULTIMODALINPUTS'
].
_serialized_start
=
16
04
_globals
[
'_MULTIMODALINPUTS'
].
_serialized_end
=
18
15
_globals
[
'_GENERATERESPONSE'
].
_serialized_start
=
18
18
_globals
[
'_GENERATERESPONSE'
].
_serialized_end
=
20
45
_globals
[
'_GENERATESTREAMCHUNK'
].
_serialized_start
=
20
48
_globals
[
'_GENERATESTREAMCHUNK'
].
_serialized_end
=
232
5
_globals
[
'_GENERATECOMPLETE'
].
_serialized_start
=
23
28
_globals
[
'_GENERATECOMPLETE'
].
_serialized_end
=
27
39
_globals
[
'_GENERATEERROR'
].
_serialized_start
=
27
41
_globals
[
'_GENERATEERROR'
].
_serialized_end
=
28
16
_globals
[
'_OUTPUTLOGPROBS'
].
_serialized_start
=
28
18
_globals
[
'_OUTPUTLOGPROBS'
].
_serialized_end
=
29
3
5
_globals
[
'_INPUTLOGPROBS'
].
_serialized_start
=
29
38
_globals
[
'_INPUTLOGPROBS'
].
_serialized_end
=
3
096
_globals
[
'_INPUTTOKENLOGPROB'
].
_serialized_start
=
3
098
_globals
[
'_INPUTTOKENLOGPROB'
].
_serialized_end
=
314
7
_globals
[
'_TOPLOGPROBS'
].
_serialized_start
=
31
49
_globals
[
'_TOPLOGPROBS'
].
_serialized_end
=
3
197
_globals
[
'_HIDDENSTATES'
].
_serialized_start
=
3
199
_globals
[
'_HIDDENSTATES'
].
_serialized_end
=
32
62
_globals
[
'_EMBEDREQUEST'
].
_serialized_start
=
32
65
_globals
[
'_EMBEDREQUEST'
].
_serialized_end
=
3
595
_globals
[
'_EMBEDRESPONSE'
].
_serialized_start
=
3
598
_globals
[
'_EMBEDRESPONSE'
].
_serialized_end
=
37
55
_globals
[
'_EMBEDCOMPLETE'
].
_serialized_start
=
375
8
_globals
[
'_EMBEDCOMPLETE'
].
_serialized_end
=
39
21
_globals
[
'_EMBEDDING'
].
_serialized_start
=
39
23
_globals
[
'_EMBEDDING'
].
_serialized_end
=
39
65
_globals
[
'_EMBEDERROR'
].
_serialized_start
=
39
67
_globals
[
'_EMBEDERROR'
].
_serialized_end
=
40
27
_globals
[
'_HEALTHCHECKREQUEST'
].
_serialized_start
=
40
29
_globals
[
'_HEALTHCHECKREQUEST'
].
_serialized_end
=
41
07
_globals
[
'_HEALTHCHECKRESPONSE'
].
_serialized_start
=
41
09
_globals
[
'_HEALTHCHECKRESPONSE'
].
_serialized_end
=
41
64
_globals
[
'_ABORTREQUEST'
].
_serialized_start
=
41
66
_globals
[
'_ABORTREQUEST'
].
_serialized_end
=
42
16
_globals
[
'_ABORTRESPONSE'
].
_serialized_start
=
42
18
_globals
[
'_ABORTRESPONSE'
].
_serialized_end
=
42
67
_globals
[
'_LOADLORAREQUEST'
].
_serialized_start
=
426
9
_globals
[
'_LOADLORAREQUEST'
].
_serialized_end
=
43
42
_globals
[
'_LOADLORARESPONSE'
].
_serialized_start
=
43
44
_globals
[
'_LOADLORARESPONSE'
].
_serialized_end
=
44
16
_globals
[
'_UNLOADLORAREQUEST'
].
_serialized_start
=
44
18
_globals
[
'_UNLOADLORAREQUEST'
].
_serialized_end
=
44
5
7
_globals
[
'_UNLOADLORARESPONSE'
].
_serialized_start
=
44
59
_globals
[
'_UNLOADLORARESPONSE'
].
_serialized_end
=
45
1
3
_globals
[
'_UPDATEWEIGHTSREQUEST'
].
_serialized_start
=
45
15
_globals
[
'_UPDATEWEIGHTSREQUEST'
].
_serialized_end
=
46
34
_globals
[
'_UPDATEWEIGHTSRESPONSE'
].
_serialized_start
=
463
6
_globals
[
'_UPDATEWEIGHTSRESPONSE'
].
_serialized_end
=
4
693
_globals
[
'_GETINTERNALSTATEREQUEST'
].
_serialized_start
=
4
695
_globals
[
'_GETINTERNALSTATEREQUEST'
].
_serialized_end
=
47
40
_globals
[
'_GETINTERNALSTATERESPONSE'
].
_serialized_start
=
47
42
_globals
[
'_GETINTERNALSTATERESPONSE'
].
_serialized_end
=
48
08
_globals
[
'_SETINTERNALSTATEREQUEST'
].
_serialized_start
=
48
10
_globals
[
'_SETINTERNALSTATEREQUEST'
].
_serialized_end
=
48
75
_globals
[
'_SETINTERNALSTATERESPONSE'
].
_serialized_start
=
48
77
_globals
[
'_SETINTERNALSTATERESPONSE'
].
_serialized_end
=
49
37
_globals
[
'_SGLANGSCHEDULER'
].
_serialized_start
=
49
40
_globals
[
'_SGLANGSCHEDULER'
].
_serialized_end
=
53
22
# @@protoc_insertion_point(module_scope)
python/sglang/srt/grpc/sglang_scheduler_pb2.pyi
View file @
a578d300
...
...
@@ -11,7 +11,7 @@ from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union
DESCRIPTOR: _descriptor.FileDescriptor
class SamplingParams(_message.Message):
__slots__ = ("temperature", "top_p", "top_k", "min_p", "frequency_penalty", "presence_penalty", "repetition_penalty", "max_new_tokens", "stop", "stop_token_ids", "skip_special_tokens", "spaces_between_special_tokens", "regex", "json_schema", "ebnf_grammar", "structural_tag", "
lora_path", "n", "token_healing
", "min_new_tokens", "ignore_eos", "no_stop_trim", "stream_interval", "logit_bias", "custom_params")
__slots__ = ("temperature", "top_p", "top_k", "min_p", "frequency_penalty", "presence_penalty", "repetition_penalty", "max_new_tokens", "stop", "stop_token_ids", "skip_special_tokens", "spaces_between_special_tokens", "regex", "json_schema", "ebnf_grammar", "structural_tag", "
n
", "min_new_tokens", "ignore_eos", "no_stop_trim", "stream_interval", "logit_bias", "custom_params")
class LogitBiasEntry(_message.Message):
__slots__ = ("key", "value")
KEY_FIELD_NUMBER: _ClassVar[int]
...
...
@@ -35,9 +35,7 @@ class SamplingParams(_message.Message):
JSON_SCHEMA_FIELD_NUMBER: _ClassVar[int]
EBNF_GRAMMAR_FIELD_NUMBER: _ClassVar[int]
STRUCTURAL_TAG_FIELD_NUMBER: _ClassVar[int]
LORA_PATH_FIELD_NUMBER: _ClassVar[int]
N_FIELD_NUMBER: _ClassVar[int]
TOKEN_HEALING_FIELD_NUMBER: _ClassVar[int]
MIN_NEW_TOKENS_FIELD_NUMBER: _ClassVar[int]
IGNORE_EOS_FIELD_NUMBER: _ClassVar[int]
NO_STOP_TRIM_FIELD_NUMBER: _ClassVar[int]
...
...
@@ -60,16 +58,14 @@ class SamplingParams(_message.Message):
json_schema: str
ebnf_grammar: str
structural_tag: str
lora_path: str
n: int
token_healing: bool
min_new_tokens: int
ignore_eos: bool
no_stop_trim: bool
stream_interval: int
logit_bias: _containers.ScalarMap[str, float]
custom_params: _struct_pb2.Struct
def __init__(self, temperature: _Optional[float] = ..., top_p: _Optional[float] = ..., top_k: _Optional[int] = ..., min_p: _Optional[float] = ..., frequency_penalty: _Optional[float] = ..., presence_penalty: _Optional[float] = ..., repetition_penalty: _Optional[float] = ..., max_new_tokens: _Optional[int] = ..., stop: _Optional[_Iterable[str]] = ..., stop_token_ids: _Optional[_Iterable[int]] = ..., skip_special_tokens: bool = ..., spaces_between_special_tokens: bool = ..., regex: _Optional[str] = ..., json_schema: _Optional[str] = ..., ebnf_grammar: _Optional[str] = ..., structural_tag: _Optional[str] = ...,
lora_path: _Optional[str] = ..., n: _Optional[int] = ..., token_healing: bool
= ..., min_new_tokens: _Optional[int] = ..., ignore_eos: bool = ..., no_stop_trim: bool = ..., stream_interval: _Optional[int] = ..., logit_bias: _Optional[_Mapping[str, float]] = ..., custom_params: _Optional[_Union[_struct_pb2.Struct, _Mapping]] = ...) -> None: ...
def __init__(self, temperature: _Optional[float] = ..., top_p: _Optional[float] = ..., top_k: _Optional[int] = ..., min_p: _Optional[float] = ..., frequency_penalty: _Optional[float] = ..., presence_penalty: _Optional[float] = ..., repetition_penalty: _Optional[float] = ..., max_new_tokens: _Optional[int] = ..., stop: _Optional[_Iterable[str]] = ..., stop_token_ids: _Optional[_Iterable[int]] = ..., skip_special_tokens: bool = ..., spaces_between_special_tokens: bool = ..., regex: _Optional[str] = ..., json_schema: _Optional[str] = ..., ebnf_grammar: _Optional[str] = ..., structural_tag: _Optional[str] = ...,
n: _Optional[int]
= ..., min_new_tokens: _Optional[int] = ..., ignore_eos: bool = ..., no_stop_trim: bool = ..., stream_interval: _Optional[int] = ..., logit_bias: _Optional[_Mapping[str, float]] = ..., custom_params: _Optional[_Union[_struct_pb2.Struct, _Mapping]] = ...) -> None: ...
class DisaggregatedParams(_message.Message):
__slots__ = ("bootstrap_host", "bootstrap_port", "bootstrap_room")
...
...
sgl-router/src/grpc_client/sglang_scheduler.rs
View file @
a578d300
...
...
@@ -202,6 +202,7 @@ impl SglangSchedulerClient {
stop
:
stop_sequences
,
stop_token_ids
:
request
.stop_token_ids
.clone
()
.unwrap_or_default
(),
skip_special_tokens
,
spaces_between_special_tokens
:
true
,
// Default from Python SamplingParams
ignore_eos
:
request
.ignore_eos
,
no_stop_trim
:
request
.no_stop_trim
,
n
:
request
.n
.unwrap_or
(
1
)
as
i32
,
...
...
@@ -301,6 +302,8 @@ impl SglangSchedulerClient {
top_k
:
-
1
,
repetition_penalty
:
1.0
,
n
:
1
,
skip_special_tokens
:
true
,
spaces_between_special_tokens
:
true
,
..
Default
::
default
()
};
...
...
@@ -444,10 +447,24 @@ mod tests {
#[test]
fn
test_sampling_params_defaults
()
{
let
params
=
proto
::
SamplingParams
::
default
();
// Numeric fields have proto defaults (0)
assert_eq!
(
params
.temperature
,
0.0
);
assert_eq!
(
params
.max_new_tokens
,
None
);
assert_eq!
(
params
.top_p
,
0.0
);
assert_eq!
(
params
.top_k
,
0
);
assert_eq!
(
params
.repetition_penalty
,
0.0
);
assert_eq!
(
params
.n
,
0
);
// Bool fields have proto defaults (false)
assert
!
(
!
params
.skip_special_tokens
);
assert
!
(
!
params
.spaces_between_special_tokens
);
assert
!
(
!
params
.ignore_eos
);
assert
!
(
!
params
.no_stop_trim
);
// Optional int fields should be None
assert_eq!
(
params
.max_new_tokens
,
None
);
assert_eq!
(
params
.stream_interval
,
None
);
// Other non-optional fields
assert_eq!
(
params
.min_p
,
0.0
);
assert_eq!
(
params
.frequency_penalty
,
0.0
);
assert_eq!
(
params
.presence_penalty
,
0.0
);
assert
!
(
params
.stop
.is_empty
());
}
...
...
sgl-router/src/proto/sglang_scheduler.proto
View file @
a578d300
...
...
@@ -27,6 +27,11 @@ service SglangScheduler {
// =====================
// Sampling parameters matching SGLang's SamplingParams
//
// IMPORTANT: Do not use SamplingParams::default() directly!
// The proto3 defaults (0 for numeric fields) do NOT match the semantic defaults
// (temperature=1.0, top_p=1.0, top_k=-1, etc.). Always construct with explicit values
// or use the conversion functions in sglang_scheduler.rs / grpc_server.py.
message
SamplingParams
{
float
temperature
=
1
;
float
top_p
=
2
;
...
...
@@ -50,24 +55,18 @@ message SamplingParams {
string
structural_tag
=
16
;
}
// LoRA adapter
string
lora_path
=
17
;
// Speculative decoding
int32
n
=
18
;
// Number of samples
// Token healing
bool
token_healing
=
19
;
int32
n
=
17
;
// Number of samples
// Additional parameters
int32
min_new_tokens
=
20
;
bool
ignore_eos
=
2
1
;
bool
no_stop_trim
=
2
2
;
int32
stream_interval
=
2
3
;
map
<
string
,
float
>
logit_bias
=
2
4
;
int32
min_new_tokens
=
18
;
bool
ignore_eos
=
1
9
;
bool
no_stop_trim
=
2
0
;
optional
int32
stream_interval
=
2
1
;
map
<
string
,
float
>
logit_bias
=
2
2
;
// Custom parameters for extensibility
google.protobuf.Struct
custom_params
=
2
5
;
google.protobuf.Struct
custom_params
=
2
3
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment