Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
99963991
Commit
99963991
authored
Sep 03, 2025
by
zhuwenwen
Browse files
Merge branch 'v0.9.2-dev' of
http://10.16.6.30/dcutoolkit/deeplearing/vllm
into v0.9.2-dev
parents
a7668e46
6cc81877
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
4 deletions
+6
-4
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+6
-4
No files found.
vllm/v1/worker/gpu_model_runner.py
View file @
99963991
...
@@ -2088,8 +2088,9 @@ class GPUModelRunner(LoRAModelRunnerMixin):
...
@@ -2088,8 +2088,9 @@ class GPUModelRunner(LoRAModelRunnerMixin):
hidden_states
=
outputs
hidden_states
=
outputs
if
self
.
speculative_config
and
self
.
speculative_config
.
use_eagle
()
and
not
is_profile
:
if
self
.
speculative_config
and
self
.
speculative_config
.
use_eagle
()
and
not
is_profile
:
assert
isinstance
(
self
.
drafter
,
EagleProposer
)
#assert isinstance(self.drafter, EagleProposer)
self
.
drafter
.
dummy_run
(
num_tokens
,
attn_metadata
)
if
hasattr
(
self
,
'drafter'
)
and
isinstance
(
self
.
drafter
,
EagleProposer
):
self
.
drafter
.
dummy_run
(
num_tokens
,
attn_metadata
)
# This is necessary to avoid blocking DP.
# This is necessary to avoid blocking DP.
# For dummy runs, we typically skip EPLB since we don't have any real
# For dummy runs, we typically skip EPLB since we don't have any real
...
@@ -2677,10 +2678,11 @@ class GPUModelRunner(LoRAModelRunnerMixin):
...
@@ -2677,10 +2678,11 @@ class GPUModelRunner(LoRAModelRunnerMixin):
kv_caches
=
self
.
initialize_kv_cache_tensors
(
kv_cache_config
)
kv_caches
=
self
.
initialize_kv_cache_tensors
(
kv_cache_config
)
if
self
.
speculative_config
and
self
.
speculative_config
.
use_eagle
():
if
self
.
speculative_config
and
self
.
speculative_config
.
use_eagle
():
assert
isinstance
(
self
.
drafter
,
EagleProposer
)
#
assert isinstance(self.drafter, EagleProposer)
# validate all draft model layers belong to the same kv cache
# validate all draft model layers belong to the same kv cache
# group
# group
self
.
drafter
.
validate_same_kv_cache_group
(
kv_cache_config
)
if
hasattr
(
self
,
'drafter'
)
and
isinstance
(
self
.
drafter
,
EagleProposer
):
self
.
drafter
.
validate_same_kv_cache_group
(
kv_cache_config
)
if
has_kv_transfer_group
():
if
has_kv_transfer_group
():
get_kv_transfer_group
().
register_kv_caches
(
kv_caches
)
get_kv_transfer_group
().
register_kv_caches
(
kv_caches
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment