Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0aeb698b
Unverified
Commit
0aeb698b
authored
Nov 26, 2025
by
Woosuk Kwon
Committed by
GitHub
Nov 26, 2025
Browse files
[Model Runner V2] Minor code cleanup (#29570)
Signed-off-by:
Woosuk Kwon
<
woosuk.kwon@berkeley.edu
>
parent
9bb33c89
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
18 additions
and
18 deletions
+18
-18
vllm/v1/worker/gpu/cudagraph_utils.py
vllm/v1/worker/gpu/cudagraph_utils.py
+2
-9
vllm/v1/worker/gpu/dp_utils.py
vllm/v1/worker/gpu/dp_utils.py
+9
-0
vllm/v1/worker/gpu/model_runner.py
vllm/v1/worker/gpu/model_runner.py
+7
-9
No files found.
vllm/v1/worker/gpu/cudagraph_utils.py
View file @
0aeb698b
...
...
@@ -16,6 +16,7 @@ from vllm.v1.core.sched.output import SchedulerOutput
from
vllm.v1.kv_cache_interface
import
KVCacheConfig
from
vllm.v1.worker.gpu.attn_utils
import
build_attn_metadata
from
vllm.v1.worker.gpu.block_table
import
BlockTables
from
vllm.v1.worker.gpu.dp_utils
import
make_num_tokens_across_dp
from
vllm.v1.worker.gpu.input_batch
import
InputBuffers
...
...
@@ -127,15 +128,7 @@ class CudaGraphManager:
slot_mappings
=
slot_mappings
,
kv_cache_config
=
kv_cache_config
,
)
if
self
.
dp_size
>
1
:
num_tokens_across_dp
=
torch
.
full
(
(
self
.
dp_size
,),
batch_size
,
dtype
=
torch
.
int32
,
device
=
"cpu"
,
)
else
:
num_tokens_across_dp
=
None
num_tokens_across_dp
=
make_num_tokens_across_dp
(
self
.
dp_size
,
batch_size
)
# Warm up.
with
set_forward_context
(
...
...
vllm/v1/worker/gpu/dp_utils.py
View file @
0aeb698b
...
...
@@ -20,3 +20,12 @@ def get_batch_metadata_across_dp(
tensor
[
1
][
dp_rank
]
=
cudagraph_size
dist
.
all_reduce
(
tensor
,
group
=
group
)
return
tensor
[
0
],
tensor
[
1
]
def
make_num_tokens_across_dp
(
dp_size
:
int
,
num_tokens
:
int
,
)
->
torch
.
Tensor
|
None
:
if
dp_size
==
1
:
return
None
return
torch
.
full
((
dp_size
,),
num_tokens
,
dtype
=
torch
.
int32
,
device
=
"cpu"
)
vllm/v1/worker/gpu/model_runner.py
View file @
0aeb698b
...
...
@@ -35,7 +35,10 @@ from vllm.v1.worker.gpu.attn_utils import (
)
from
vllm.v1.worker.gpu.block_table
import
BlockTables
from
vllm.v1.worker.gpu.cudagraph_utils
import
CudaGraphManager
from
vllm.v1.worker.gpu.dp_utils
import
get_batch_metadata_across_dp
from
vllm.v1.worker.gpu.dp_utils
import
(
get_batch_metadata_across_dp
,
make_num_tokens_across_dp
,
)
from
vllm.v1.worker.gpu.input_batch
import
(
InputBatch
,
InputBuffers
,
...
...
@@ -255,12 +258,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
if
not
skip_attn
:
self
.
prepare_dummy_attn_metadata
(
input_batch
)
if
self
.
dp_size
==
1
:
num_tokens_across_dp
:
torch
.
Tensor
|
None
=
None
else
:
num_tokens_across_dp
=
torch
.
full
(
(
self
.
dp_size
,),
num_tokens
,
dtype
=
torch
.
int32
,
device
=
"cpu"
)
num_tokens_across_dp
=
make_num_tokens_across_dp
(
self
.
dp_size
,
num_tokens
)
num_sampled_tokens
=
np
.
ones
(
input_batch
.
num_reqs
,
dtype
=
np
.
int32
)
with
(
self
.
maybe_dummy_run_with_lora
(
...
...
@@ -816,7 +814,6 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
self
.
req_states
.
last_sampled_tokens
,
next_prefill_tokens
,
)
self
.
req_states
.
draft_tokens
[
input_batch
.
idx_mapping
]
=
draft_tokens
return
draft_tokens
def
get_cudagraph_and_dp_padding
(
...
...
@@ -1006,7 +1003,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
input_batch
,
sampler_output
.
sampled_token_ids
,
num_sampled
,
num_rejected
)
if
self
.
do_spec_decode
:
_
=
self
.
propose_draft
(
draft_tokens
=
self
.
propose_draft
(
input_batch
,
sampling_metadata
,
hidden_states
,
...
...
@@ -1014,6 +1011,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
num_sampled
,
num_rejected
,
)
self
.
req_states
.
draft_tokens
[
input_batch
.
idx_mapping
]
=
draft_tokens
if
self
.
use_async_scheduling
:
return
async_output
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment