Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
7dcd689b
Unverified
Commit
7dcd689b
authored
Sep 25, 2025
by
Chang Su
Committed by
GitHub
Sep 25, 2025
Browse files
[router][refactor] Clean up protobuf fields (#10923)
parent
f7bab41a
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
102 additions
and
109 deletions
+102
-109
python/sglang/srt/entrypoints/grpc_server.py
python/sglang/srt/entrypoints/grpc_server.py
+8
-7
python/sglang/srt/grpc/sglang_scheduler.proto
python/sglang/srt/grpc/sglang_scheduler.proto
+14
-16
python/sglang/srt/grpc/sglang_scheduler_pb2.py
python/sglang/srt/grpc/sglang_scheduler_pb2.py
+54
-54
python/sglang/srt/grpc/sglang_scheduler_pb2.pyi
python/sglang/srt/grpc/sglang_scheduler_pb2.pyi
+12
-16
sgl-router/src/proto/sglang_scheduler.proto
sgl-router/src/proto/sglang_scheduler.proto
+14
-16
No files found.
python/sglang/srt/entrypoints/grpc_server.py
View file @
7dcd689b
...
...
@@ -266,7 +266,6 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
prompt_tokens
=
result
.
get
(
"prompt_tokens"
,
0
),
cached_tokens
=
0
,
embedding_dim
=
len
(
result
[
"embedding"
]),
generation_time
=
time
.
time
()
-
self
.
start_time
,
),
)
...
...
@@ -477,16 +476,14 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
self
,
request_id
:
str
,
output
:
Dict
)
->
sglang_scheduler_pb2
.
GenerateResponse
:
"""Create a streaming chunk response."""
meta_info
=
output
.
get
(
"meta_info"
,
{})
return
sglang_scheduler_pb2
.
GenerateResponse
(
request_id
=
request_id
,
chunk
=
sglang_scheduler_pb2
.
GenerateStreamChunk
(
token_id
=
output
[
"token_ids"
][
-
1
]
if
output
.
get
(
"token_ids"
)
else
0
,
text
=
output
.
get
(
"text"
,
""
),
prompt_tokens
=
0
,
completion_tokens
=
len
(
output
.
get
(
"token_ids"
,
[])),
prompt_tokens
=
meta_info
.
get
(
"prompt_tokens"
,
0
),
completion_tokens
=
meta_info
.
get
(
"completion_tokens"
,
0
),
cached_tokens
=
0
,
generation_time
=
time
.
time
()
-
self
.
start_time
,
queue_time
=
0.0
,
),
)
...
...
@@ -507,8 +504,12 @@ class SGLangSchedulerServicer(sglang_scheduler_pb2_grpc.SglangSchedulerServicer)
request_id
=
request_id
,
complete
=
sglang_scheduler_pb2
.
GenerateComplete
(
output_ids
=
output
.
get
(
"token_ids"
,
[]),
output_text
=
output
.
get
(
"text"
,
""
),
finish_reason
=
finish_reason
,
prompt_tokens
=
meta_info
.
get
(
"prompt_tokens"
,
0
),
completion_tokens
=
meta_info
.
get
(
"completion_tokens"
,
len
(
output
.
get
(
"token_ids"
,
[]))
),
cached_tokens
=
meta_info
.
get
(
"cached_tokens"
,
0
),
),
)
...
...
python/sglang/srt/grpc/sglang_scheduler.proto
View file @
7dcd689b
...
...
@@ -165,28 +165,22 @@ message GenerateResponse {
message
GenerateStreamChunk
{
// Generated token
int32
token_id
=
1
;
string
text
=
2
;
// Cumulative counts
int32
prompt_tokens
=
3
;
int32
completion_tokens
=
4
;
int32
cached_tokens
=
5
;
int32
prompt_tokens
=
2
;
int32
completion_tokens
=
3
;
int32
cached_tokens
=
4
;
// Logprobs (if requested)
LogProbs
logprobs
=
6
;
LogProbs
logprobs
=
5
;
// Hidden states (if requested)
repeated
float
hidden_states
=
7
;
// Metadata
float
generation_time
=
8
;
// Time to generate this token
int32
queue_time
=
9
;
// Time spent in queue
repeated
float
hidden_states
=
6
;
}
message
GenerateComplete
{
// Final output
repeated
int32
output_ids
=
1
;
string
output_text
=
2
;
// Finish reason
enum
FinishReason
{
...
...
@@ -201,13 +195,18 @@ message GenerateComplete {
// The request was aborted by the user or system.
ABORT
=
4
;
}
FinishReason
finish_reason
=
3
;
FinishReason
finish_reason
=
2
;
// Token usage counts
int32
prompt_tokens
=
3
;
int32
completion_tokens
=
4
;
int32
cached_tokens
=
5
;
// All logprobs if requested
repeated
LogProbs
all_logprobs
=
11
;
repeated
LogProbs
all_logprobs
=
6
;
// All hidden states if requested
repeated
HiddenStates
all_hidden_states
=
12
;
repeated
HiddenStates
all_hidden_states
=
7
;
}
message
GenerateError
{
...
...
@@ -285,10 +284,9 @@ message EmbedComplete {
// Additional metadata
int32
embedding_dim
=
4
;
float
generation_time
=
5
;
// For batch embeddings
repeated
Embedding
batch_embeddings
=
6
;
repeated
Embedding
batch_embeddings
=
5
;
}
message
Embedding
{
...
...
python/sglang/srt/grpc/sglang_scheduler_pb2.py
View file @
7dcd689b
This diff is collapsed.
Click to expand it.
python/sglang/srt/grpc/sglang_scheduler_pb2.pyi
View file @
7dcd689b
...
...
@@ -161,29 +161,23 @@ class GenerateResponse(_message.Message):
def __init__(self, request_id: _Optional[str] = ..., chunk: _Optional[_Union[GenerateStreamChunk, _Mapping]] = ..., complete: _Optional[_Union[GenerateComplete, _Mapping]] = ..., error: _Optional[_Union[GenerateError, _Mapping]] = ...) -> None: ...
class GenerateStreamChunk(_message.Message):
__slots__ = ("token_id",
"text",
"prompt_tokens", "completion_tokens", "cached_tokens", "logprobs", "hidden_states"
, "generation_time", "queue_time"
)
__slots__ = ("token_id", "prompt_tokens", "completion_tokens", "cached_tokens", "logprobs", "hidden_states")
TOKEN_ID_FIELD_NUMBER: _ClassVar[int]
TEXT_FIELD_NUMBER: _ClassVar[int]
PROMPT_TOKENS_FIELD_NUMBER: _ClassVar[int]
COMPLETION_TOKENS_FIELD_NUMBER: _ClassVar[int]
CACHED_TOKENS_FIELD_NUMBER: _ClassVar[int]
LOGPROBS_FIELD_NUMBER: _ClassVar[int]
HIDDEN_STATES_FIELD_NUMBER: _ClassVar[int]
GENERATION_TIME_FIELD_NUMBER: _ClassVar[int]
QUEUE_TIME_FIELD_NUMBER: _ClassVar[int]
token_id: int
text: str
prompt_tokens: int
completion_tokens: int
cached_tokens: int
logprobs: LogProbs
hidden_states: _containers.RepeatedScalarFieldContainer[float]
generation_time: float
queue_time: int
def __init__(self, token_id: _Optional[int] = ..., text: _Optional[str] = ..., prompt_tokens: _Optional[int] = ..., completion_tokens: _Optional[int] = ..., cached_tokens: _Optional[int] = ..., logprobs: _Optional[_Union[LogProbs, _Mapping]] = ..., hidden_states: _Optional[_Iterable[float]] = ..., generation_time: _Optional[float] = ..., queue_time: _Optional[int] = ...) -> None: ...
def __init__(self, token_id: _Optional[int] = ..., prompt_tokens: _Optional[int] = ..., completion_tokens: _Optional[int] = ..., cached_tokens: _Optional[int] = ..., logprobs: _Optional[_Union[LogProbs, _Mapping]] = ..., hidden_states: _Optional[_Iterable[float]] = ...) -> None: ...
class GenerateComplete(_message.Message):
__slots__ = ("output_ids", "
output_text", "finish_reason
", "all_logprobs", "all_hidden_states")
__slots__ = ("output_ids", "
finish_reason", "prompt_tokens", "completion_tokens", "cached_tokens
", "all_logprobs", "all_hidden_states")
class FinishReason(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
__slots__ = ()
STOP: _ClassVar[GenerateComplete.FinishReason]
...
...
@@ -197,16 +191,20 @@ class GenerateComplete(_message.Message):
STOP_STR: GenerateComplete.FinishReason
ABORT: GenerateComplete.FinishReason
OUTPUT_IDS_FIELD_NUMBER: _ClassVar[int]
OUTPUT_TEXT_FIELD_NUMBER: _ClassVar[int]
FINISH_REASON_FIELD_NUMBER: _ClassVar[int]
PROMPT_TOKENS_FIELD_NUMBER: _ClassVar[int]
COMPLETION_TOKENS_FIELD_NUMBER: _ClassVar[int]
CACHED_TOKENS_FIELD_NUMBER: _ClassVar[int]
ALL_LOGPROBS_FIELD_NUMBER: _ClassVar[int]
ALL_HIDDEN_STATES_FIELD_NUMBER: _ClassVar[int]
output_ids: _containers.RepeatedScalarFieldContainer[int]
output_text: str
finish_reason: GenerateComplete.FinishReason
prompt_tokens: int
completion_tokens: int
cached_tokens: int
all_logprobs: _containers.RepeatedCompositeFieldContainer[LogProbs]
all_hidden_states: _containers.RepeatedCompositeFieldContainer[HiddenStates]
def __init__(self, output_ids: _Optional[_Iterable[int]] = ...,
output_text: _Optional[str] = ...,
finish_reason: _Optional[_Union[GenerateComplete.FinishReason, str]] = ..., all_logprobs: _Optional[_Iterable[_Union[LogProbs, _Mapping]]] = ..., all_hidden_states: _Optional[_Iterable[_Union[HiddenStates, _Mapping]]] = ...) -> None: ...
def __init__(self, output_ids: _Optional[_Iterable[int]] = ..., finish_reason: _Optional[_Union[GenerateComplete.FinishReason, str]] = ...,
prompt_tokens: _Optional[int] = ..., completion_tokens: _Optional[int] = ..., cached_tokens: _Optional[int] = ...,
all_logprobs: _Optional[_Iterable[_Union[LogProbs, _Mapping]]] = ..., all_hidden_states: _Optional[_Iterable[_Union[HiddenStates, _Mapping]]] = ...) -> None: ...
class GenerateError(_message.Message):
__slots__ = ("message", "http_status_code", "details")
...
...
@@ -283,20 +281,18 @@ class EmbedResponse(_message.Message):
def __init__(self, request_id: _Optional[str] = ..., complete: _Optional[_Union[EmbedComplete, _Mapping]] = ..., error: _Optional[_Union[EmbedError, _Mapping]] = ...) -> None: ...
class EmbedComplete(_message.Message):
__slots__ = ("embedding", "prompt_tokens", "cached_tokens", "embedding_dim",
"generation_time",
"batch_embeddings")
__slots__ = ("embedding", "prompt_tokens", "cached_tokens", "embedding_dim", "batch_embeddings")
EMBEDDING_FIELD_NUMBER: _ClassVar[int]
PROMPT_TOKENS_FIELD_NUMBER: _ClassVar[int]
CACHED_TOKENS_FIELD_NUMBER: _ClassVar[int]
EMBEDDING_DIM_FIELD_NUMBER: _ClassVar[int]
GENERATION_TIME_FIELD_NUMBER: _ClassVar[int]
BATCH_EMBEDDINGS_FIELD_NUMBER: _ClassVar[int]
embedding: _containers.RepeatedScalarFieldContainer[float]
prompt_tokens: int
cached_tokens: int
embedding_dim: int
generation_time: float
batch_embeddings: _containers.RepeatedCompositeFieldContainer[Embedding]
def __init__(self, embedding: _Optional[_Iterable[float]] = ..., prompt_tokens: _Optional[int] = ..., cached_tokens: _Optional[int] = ..., embedding_dim: _Optional[int] = ...,
generation_time: _Optional[float] = ...,
batch_embeddings: _Optional[_Iterable[_Union[Embedding, _Mapping]]] = ...) -> None: ...
def __init__(self, embedding: _Optional[_Iterable[float]] = ..., prompt_tokens: _Optional[int] = ..., cached_tokens: _Optional[int] = ..., embedding_dim: _Optional[int] = ..., batch_embeddings: _Optional[_Iterable[_Union[Embedding, _Mapping]]] = ...) -> None: ...
class Embedding(_message.Message):
__slots__ = ("values", "index")
...
...
sgl-router/src/proto/sglang_scheduler.proto
View file @
7dcd689b
...
...
@@ -165,28 +165,22 @@ message GenerateResponse {
message
GenerateStreamChunk
{
// Generated token
int32
token_id
=
1
;
string
text
=
2
;
// Cumulative counts
int32
prompt_tokens
=
3
;
int32
completion_tokens
=
4
;
int32
cached_tokens
=
5
;
int32
prompt_tokens
=
2
;
int32
completion_tokens
=
3
;
int32
cached_tokens
=
4
;
// Logprobs (if requested)
LogProbs
logprobs
=
6
;
LogProbs
logprobs
=
5
;
// Hidden states (if requested)
repeated
float
hidden_states
=
7
;
// Metadata
float
generation_time
=
8
;
// Time to generate this token
int32
queue_time
=
9
;
// Time spent in queue
repeated
float
hidden_states
=
6
;
}
message
GenerateComplete
{
// Final output
repeated
int32
output_ids
=
1
;
string
output_text
=
2
;
// Finish reason
enum
FinishReason
{
...
...
@@ -201,13 +195,18 @@ message GenerateComplete {
// The request was aborted by the user or system.
ABORT
=
4
;
}
FinishReason
finish_reason
=
3
;
FinishReason
finish_reason
=
2
;
// Token usage counts
int32
prompt_tokens
=
3
;
int32
completion_tokens
=
4
;
int32
cached_tokens
=
5
;
// All logprobs if requested
repeated
LogProbs
all_logprobs
=
11
;
repeated
LogProbs
all_logprobs
=
6
;
// All hidden states if requested
repeated
HiddenStates
all_hidden_states
=
12
;
repeated
HiddenStates
all_hidden_states
=
7
;
}
message
GenerateError
{
...
...
@@ -285,10 +284,9 @@ message EmbedComplete {
// Additional metadata
int32
embedding_dim
=
4
;
float
generation_time
=
5
;
// For batch embeddings
repeated
Embedding
batch_embeddings
=
6
;
repeated
Embedding
batch_embeddings
=
5
;
}
message
Embedding
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment