Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
40b8363b
Unverified
Commit
40b8363b
authored
Mar 19, 2026
by
Woosuk Kwon
Committed by
GitHub
Mar 19, 2026
Browse files
[MRV2] Use fp32 for draft logits (#37526)
Signed-off-by:
Woosuk Kwon
<
woosuk@inferact.ai
>
parent
8b10e4fb
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
1 addition
and
3 deletions
+1
-3
vllm/v1/worker/gpu/model_runner.py
vllm/v1/worker/gpu/model_runner.py
+0
-1
vllm/v1/worker/gpu/states.py
vllm/v1/worker/gpu/states.py
+1
-2
No files found.
vllm/v1/worker/gpu/model_runner.py
View file @
40b8363b
...
@@ -195,7 +195,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
...
@@ -195,7 +195,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
num_speculative_steps
=
self
.
num_speculative_steps
,
num_speculative_steps
=
self
.
num_speculative_steps
,
vocab_size
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
device
=
self
.
device
,
device
=
self
.
device
,
model_dtype
=
self
.
dtype
,
cache_draft_logits
=
not
use_strict_rejection_sampling
,
cache_draft_logits
=
not
use_strict_rejection_sampling
,
)
)
self
.
input_buffers
=
InputBuffers
(
self
.
input_buffers
=
InputBuffers
(
...
...
vllm/v1/worker/gpu/states.py
View file @
40b8363b
...
@@ -15,7 +15,6 @@ class RequestState:
...
@@ -15,7 +15,6 @@ class RequestState:
num_speculative_steps
:
int
,
num_speculative_steps
:
int
,
vocab_size
:
int
,
vocab_size
:
int
,
device
:
torch
.
device
,
device
:
torch
.
device
,
model_dtype
:
torch
.
dtype
,
cache_draft_logits
:
bool
,
cache_draft_logits
:
bool
,
):
):
self
.
max_num_reqs
=
max_num_reqs
self
.
max_num_reqs
=
max_num_reqs
...
@@ -81,7 +80,7 @@ class RequestState:
...
@@ -81,7 +80,7 @@ class RequestState:
self
.
max_num_reqs
,
self
.
max_num_reqs
,
self
.
num_speculative_steps
,
self
.
num_speculative_steps
,
self
.
vocab_size
,
self
.
vocab_size
,
dtype
=
model_dtype
,
dtype
=
torch
.
float32
,
device
=
device
,
device
=
device
,
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment