Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2e66885c
"vllm/vscode:/vscode.git/clone" did not exist on "d3af8c18317c0dc008d42e4367fbb9045cfb7bf6"
Commit
2e66885c
authored
Aug 18, 2025
by
lizhigong
Browse files
fix issue from merge
parent
787c3715
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
5 additions
and
12 deletions
+5
-12
vllm/zero_overhead/v1/gpu_model_runner.py
vllm/zero_overhead/v1/gpu_model_runner.py
+5
-12
No files found.
vllm/zero_overhead/v1/gpu_model_runner.py
View file @
2e66885c
...
@@ -348,7 +348,6 @@ class V1ZeroModelRunner(GPUModelRunner):
...
@@ -348,7 +348,6 @@ class V1ZeroModelRunner(GPUModelRunner):
else
:
else
:
block_table
=
None
block_table
=
None
num_rejected_tokens
=
None
if
spec_decode_metadata
is
None
:
if
spec_decode_metadata
is
None
:
# input_ids can be None for multimodal models.
# input_ids can be None for multimodal models.
target_token_ids
=
self
.
input_ids
[:
num_scheduled_tokens
]
target_token_ids
=
self
.
input_ids
[:
num_scheduled_tokens
]
...
@@ -364,21 +363,15 @@ class V1ZeroModelRunner(GPUModelRunner):
...
@@ -364,21 +363,15 @@ class V1ZeroModelRunner(GPUModelRunner):
cu_num_tokens
=
eagle_attn_metadata
.
query_start_loc
cu_num_tokens
=
eagle_attn_metadata
.
query_start_loc
else
:
else
:
# TODO(woosuk): Refactor this.
# TODO(woosuk): Refactor this.
num_draft_tokens
=
spec_decode_metadata
.
num_draft_tokens
num_accepted_tokens
=
[
len
(
s
)
-
1
for
s
in
sampled_token_ids
]
num_rejected_tokens
=
[
num_accepted_tokens_tensor
=
async_tensor_h2d
(
n
+
1
-
len
(
sampled_token_ids
[
i
])
if
n
>
0
else
0
num_accepted_tokens
,
for
i
,
n
in
enumerate
(
num_draft_tokens
)
]
num_rejected_tokens_tensor
=
async_tensor_h2d
(
num_rejected_tokens
,
dtype
=
torch
.
int32
,
dtype
=
torch
.
int32
,
target_device
=
self
.
device
,
target_device
=
self
.
device
,
pin_memory
=
True
)
pin_memory
=
True
)
num_tokens
=
num_scheduled_tokens
-
sum
(
num_rejected_tokens
)
cu_num_tokens
,
token_indices
=
self
.
drafter
.
prepare_inputs
(
cu_num_tokens
,
token_indices
=
self
.
drafter
.
prepare_inputs
(
eagle_attn_metadata
.
query_start_loc
,
eagle_attn_metadata
.
query_start_loc
,
num_rejected_tokens_tensor
,
num_accepted_tokens_tensor
,
num_tokens
,
)
)
target_token_ids
=
self
.
input_ids
[
token_indices
]
target_token_ids
=
self
.
input_ids
[
token_indices
]
# TODO(woosuk): Support M-RoPE.
# TODO(woosuk): Support M-RoPE.
...
@@ -399,7 +392,7 @@ class V1ZeroModelRunner(GPUModelRunner):
...
@@ -399,7 +392,7 @@ class V1ZeroModelRunner(GPUModelRunner):
cu_num_tokens
=
cu_num_tokens
,
cu_num_tokens
=
cu_num_tokens
,
block_table
=
block_table
,
block_table
=
block_table
,
sampling_metadata
=
sampling_metadata
,
sampling_metadata
=
sampling_metadata
,
num_rejected_tokens
=
num_rejected_tokens
decoding
=
spec_decode_metadata
is
not
None
)
)
spec_token_ids
=
np
.
ones
(
draft_token_ids
.
shape
,
dtype
=
int
).
tolist
()
spec_token_ids
=
np
.
ones
(
draft_token_ids
.
shape
,
dtype
=
int
).
tolist
()
self
.
last_draft_token_ids
=
draft_token_ids
self
.
last_draft_token_ids
=
draft_token_ids
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment