Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
fc7980db
"examples/vscode:/vscode.git/clone" did not exist on "716892049190e55b432a0cb84d0250f0e68c310a"
Commit
fc7980db
authored
Feb 05, 2026
by
zhuwenwen
Browse files
Merge tag 'v0.15.1' into v0.15.1-ori
parents
3eab7fef
1892993b
Changes
62
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
1 deletion
+13
-1
vllm/v1/core/sched/scheduler.py
vllm/v1/core/sched/scheduler.py
+1
-1
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+12
-0
No files found.
vllm/v1/core/sched/scheduler.py
View file @
fc7980db
...
@@ -1284,7 +1284,7 @@ class Scheduler(SchedulerInterface):
...
@@ -1284,7 +1284,7 @@ class Scheduler(SchedulerInterface):
scheduled_spec_token_ids
=
(
scheduled_spec_token_ids
=
(
scheduler_output
.
scheduled_spec_decode_tokens
.
get
(
req_id
)
scheduler_output
.
scheduled_spec_decode_tokens
.
get
(
req_id
)
)
)
if
scheduled_spec_token_ids
:
if
scheduled_spec_token_ids
and
generated_token_ids
:
num_draft_tokens
=
len
(
scheduled_spec_token_ids
)
num_draft_tokens
=
len
(
scheduled_spec_token_ids
)
num_accepted
=
len
(
generated_token_ids
)
-
1
num_accepted
=
len
(
generated_token_ids
)
-
1
num_rejected
=
num_draft_tokens
-
num_accepted
num_rejected
=
num_draft_tokens
-
num_accepted
...
...
vllm/v1/worker/gpu_model_runner.py
View file @
fc7980db
...
@@ -1382,12 +1382,14 @@ class GPUModelRunner(
...
@@ -1382,12 +1382,14 @@ class GPUModelRunner(
num_scheduled_tokens
:
dict
[
str
,
int
],
num_scheduled_tokens
:
dict
[
str
,
int
],
kv_cache_spec
:
KVCacheSpec
,
kv_cache_spec
:
KVCacheSpec
,
num_reqs
:
int
,
num_reqs
:
int
,
for_cudagraph_capture
:
bool
=
False
,
)
->
tuple
[
torch
.
Tensor
|
None
,
np
.
ndarray
|
None
]:
)
->
tuple
[
torch
.
Tensor
|
None
,
np
.
ndarray
|
None
]:
if
not
isinstance
(
kv_cache_spec
,
CrossAttentionSpec
):
if
not
isinstance
(
kv_cache_spec
,
CrossAttentionSpec
):
return
None
,
None
return
None
,
None
# Zero out buffer for padding requests that are not actually scheduled (CGs)
# Zero out buffer for padding requests that are not actually scheduled (CGs)
self
.
encoder_seq_lens
.
np
[:
num_reqs
]
=
0
self
.
encoder_seq_lens
.
np
[:
num_reqs
]
=
0
# Build encoder_seq_lens array mapping request indices to
# Build encoder_seq_lens array mapping request indices to
# encoder lengths for inputs scheduled in this batch
# encoder lengths for inputs scheduled in this batch
for
req_id
in
num_scheduled_tokens
:
for
req_id
in
num_scheduled_tokens
:
...
@@ -1404,6 +1406,15 @@ class GPUModelRunner(
...
@@ -1404,6 +1406,15 @@ class GPUModelRunner(
feature
.
mm_position
.
length
for
feature
in
req_state
.
mm_features
feature
.
mm_position
.
length
for
feature
in
req_state
.
mm_features
)
)
self
.
encoder_seq_lens
.
np
[
req_index
]
=
encoder_input_tokens
self
.
encoder_seq_lens
.
np
[
req_index
]
=
encoder_input_tokens
if
for_cudagraph_capture
:
# During CUDA graph capture, we need to use realistic encoder lengths
# so that max_seqlen_k is captured with the correct value.
max_encoder_len
=
getattr
(
self
.
model_config
.
hf_config
,
"max_source_positions"
,
self
.
max_encoder_len
,
)
self
.
encoder_seq_lens
.
np
[:
num_reqs
]
=
max_encoder_len
self
.
encoder_seq_lens
.
copy_to_gpu
(
num_reqs
)
self
.
encoder_seq_lens
.
copy_to_gpu
(
num_reqs
)
encoder_seq_lens
=
self
.
encoder_seq_lens
.
gpu
[:
num_reqs
]
encoder_seq_lens
=
self
.
encoder_seq_lens
.
gpu
[:
num_reqs
]
...
@@ -1821,6 +1832,7 @@ class GPUModelRunner(
...
@@ -1821,6 +1832,7 @@ class GPUModelRunner(
num_scheduled_tokens
or
{},
num_scheduled_tokens
or
{},
kv_cache_group
.
kv_cache_spec
,
kv_cache_group
.
kv_cache_spec
,
num_reqs_padded
,
num_reqs_padded
,
for_cudagraph_capture
=
for_cudagraph_capture
,
)
)
if
kv_cache_gid
>
0
:
if
kv_cache_gid
>
0
:
cm
.
block_table_tensor
=
_get_block_table
(
kv_cache_gid
)
cm
.
block_table_tensor
=
_get_block_table
(
kv_cache_gid
)
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment