Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bc14663e
Unverified
Commit
bc14663e
authored
Jan 22, 2026
by
Nick Hill
Committed by
GitHub
Jan 22, 2026
Browse files
[Cleanup] Move scheduler `get_routed_experts` logic to separate method (#32706)
Signed-off-by:
Nick Hill
<
nickhill123@gmail.com
>
parent
654a71fc
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
31 additions
and
23 deletions
+31
-23
vllm/v1/core/sched/scheduler.py
vllm/v1/core/sched/scheduler.py
+31
-23
No files found.
vllm/v1/core/sched/scheduler.py
View file @
bc14663e
...
@@ -1197,28 +1197,7 @@ class Scheduler(SchedulerInterface):
...
@@ -1197,28 +1197,7 @@ class Scheduler(SchedulerInterface):
routed_experts
=
None
routed_experts
=
None
if
stopped
:
if
stopped
:
if
self
.
vllm_config
.
model_config
.
enable_return_routed_experts
:
routed_experts
=
self
.
_get_routed_experts
(
request
)
kv_blocks
=
self
.
kv_cache_manager
.
get_blocks
(
request
.
request_id
)
block_ids
=
kv_blocks
.
get_block_ids
()[
0
]
num_tokens
=
request
.
num_tokens
-
1
# compute slot mapping
block_ids_array
=
np
.
array
(
block_ids
,
dtype
=
np
.
int32
)
num_blocks
=
len
(
block_ids
)
block_size
=
self
.
block_size
# generate block offsets
block_offsets
=
np
.
arange
(
0
,
block_size
)
# compute slot mapping: slot = block_id * block_size + offset
slot_mapping
=
(
block_offsets
.
reshape
((
1
,
block_size
))
+
block_ids_array
.
reshape
((
num_blocks
,
1
))
*
block_size
).
flatten
()[:
num_tokens
]
routed_experts
=
self
.
routed_experts_reader
.
get_routed_experts
(
indices
=
slot_mapping
)
kv_transfer_params
=
self
.
_free_request
(
request
)
kv_transfer_params
=
self
.
_free_request
(
request
)
if
status_before_stop
==
RequestStatus
.
RUNNING
:
if
status_before_stop
==
RequestStatus
.
RUNNING
:
stopped_running_reqs
.
add
(
request
)
stopped_running_reqs
.
add
(
request
)
...
@@ -1250,7 +1229,12 @@ class Scheduler(SchedulerInterface):
...
@@ -1250,7 +1229,12 @@ class Scheduler(SchedulerInterface):
# Get prompt logprobs for this request.
# Get prompt logprobs for this request.
prompt_logprobs_tensors
=
prompt_logprobs_dict
.
get
(
req_id
)
prompt_logprobs_tensors
=
prompt_logprobs_dict
.
get
(
req_id
)
if
new_token_ids
or
pooler_output
is
not
None
or
kv_transfer_params
:
if
(
new_token_ids
or
pooler_output
is
not
None
or
kv_transfer_params
or
stopped
):
# Add EngineCoreOutput for this Request.
# Add EngineCoreOutput for this Request.
outputs
[
request
.
client_index
].
append
(
outputs
[
request
.
client_index
].
append
(
EngineCoreOutput
(
EngineCoreOutput
(
...
@@ -1351,6 +1335,30 @@ class Scheduler(SchedulerInterface):
...
@@ -1351,6 +1335,30 @@ class Scheduler(SchedulerInterface):
return
engine_core_outputs
return
engine_core_outputs
def
_get_routed_experts
(
self
,
request
:
Request
)
->
np
.
ndarray
|
None
:
if
not
self
.
vllm_config
.
model_config
.
enable_return_routed_experts
:
return
None
kv_blocks
=
self
.
kv_cache_manager
.
get_blocks
(
request
.
request_id
)
block_ids
=
kv_blocks
.
get_block_ids
()[
0
]
num_tokens
=
request
.
num_tokens
-
1
# compute slot mapping
block_ids_array
=
np
.
array
(
block_ids
,
dtype
=
np
.
int32
)
num_blocks
=
len
(
block_ids
)
block_size
=
self
.
block_size
# generate block offsets
block_offsets
=
np
.
arange
(
0
,
block_size
)
# compute slot mapping: slot = block_id * block_size + offset
slot_mapping
=
(
block_offsets
.
reshape
((
1
,
block_size
))
+
block_ids_array
.
reshape
((
num_blocks
,
1
))
*
block_size
).
flatten
()[:
num_tokens
]
return
self
.
routed_experts_reader
.
get_routed_experts
(
indices
=
slot_mapping
)
def
_update_request_with_output
(
def
_update_request_with_output
(
self
,
request
:
Request
,
new_token_ids
:
list
[
int
]
self
,
request
:
Request
,
new_token_ids
:
list
[
int
]
)
->
tuple
[
list
[
int
],
bool
]:
)
->
tuple
[
list
[
int
],
bool
]:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment