Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c645e9a2
Unverified
Commit
c645e9a2
authored
Feb 22, 2026
by
Woosuk Kwon
Committed by
GitHub
Feb 22, 2026
Browse files
[Model Runner V2] Remove propose_draft method (#35070)
Signed-off-by:
Woosuk Kwon
<
woosuk@inferact.ai
>
parent
944ffb59
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
5 additions
and
24 deletions
+5
-24
vllm/v1/worker/gpu/model_runner.py
vllm/v1/worker/gpu/model_runner.py
+5
-24
No files found.
vllm/v1/worker/gpu/model_runner.py
View file @
c645e9a2
...
@@ -858,29 +858,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
...
@@ -858,29 +858,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
computed_prefill
,
self
.
req_states
.
prefill_len
.
np
,
out
=
computed_prefill
computed_prefill
,
self
.
req_states
.
prefill_len
.
np
,
out
=
computed_prefill
)
)
@
torch
.
inference_mode
()
def
propose_draft
(
self
,
input_batch
:
InputBatch
,
last_hidden_states
:
torch
.
Tensor
,
aux_hidden_states
:
list
[
torch
.
Tensor
]
|
None
,
num_sampled
:
torch
.
Tensor
,
num_rejected
:
torch
.
Tensor
,
)
->
torch
.
Tensor
:
assert
self
.
speculator
is
not
None
draft_tokens
=
self
.
speculator
.
propose
(
input_batch
,
last_hidden_states
,
aux_hidden_states
,
num_sampled
,
num_rejected
,
self
.
req_states
.
last_sampled_tokens
,
self
.
req_states
.
next_prefill_tokens
,
self
.
sampler
.
sampling_states
.
temperature
.
gpu
,
self
.
sampler
.
sampling_states
.
seeds
.
gpu
,
)
return
draft_tokens
@
torch
.
inference_mode
()
@
torch
.
inference_mode
()
def
execute_model
(
def
execute_model
(
self
,
self
,
...
@@ -1113,12 +1090,16 @@ class GPUModelRunner(LoRAModelRunnerMixin):
...
@@ -1113,12 +1090,16 @@ class GPUModelRunner(LoRAModelRunnerMixin):
input_batch
,
sampler_output
.
sampled_token_ids
,
num_sampled
,
num_rejected
input_batch
,
sampler_output
.
sampled_token_ids
,
num_sampled
,
num_rejected
)
)
if
self
.
speculator
is
not
None
:
if
self
.
speculator
is
not
None
:
draft_tokens
=
self
.
propose
_draft
(
draft_tokens
=
self
.
speculator
.
propose
(
input_batch
,
input_batch
,
hidden_states
,
hidden_states
,
aux_hidden_states
,
aux_hidden_states
,
num_sampled
,
num_sampled
,
num_rejected
,
num_rejected
,
self
.
req_states
.
last_sampled_tokens
,
self
.
req_states
.
next_prefill_tokens
,
self
.
sampler
.
sampling_states
.
temperature
.
gpu
,
self
.
sampler
.
sampling_states
.
seeds
.
gpu
,
)
)
self
.
req_states
.
draft_tokens
[
input_batch
.
idx_mapping
]
=
draft_tokens
self
.
req_states
.
draft_tokens
[
input_batch
.
idx_mapping
]
=
draft_tokens
self
.
draft_tokens_handler
.
set_draft_tokens
(
input_batch
,
draft_tokens
)
self
.
draft_tokens_handler
.
set_draft_tokens
(
input_batch
,
draft_tokens
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment