Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1a0a04da
Unverified
Commit
1a0a04da
authored
Sep 19, 2025
by
Chen Ding
Committed by
GitHub
Sep 19, 2025
Browse files
[Perf] Optimize memory peak during EAGLE model loading. (#24585)
Signed-off-by:
Chen Ding
<
candy.dc@alibaba-inc.com
>
parent
6d8246aa
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
26 additions
and
26 deletions
+26
-26
vllm/model_executor/models/deepseek_eagle.py
vllm/model_executor/models/deepseek_eagle.py
+8
-7
vllm/model_executor/models/llama4_eagle.py
vllm/model_executor/models/llama4_eagle.py
+10
-12
vllm/model_executor/models/llama_eagle.py
vllm/model_executor/models/llama_eagle.py
+8
-7
No files found.
vllm/model_executor/models/deepseek_eagle.py
View file @
1a0a04da
...
...
@@ -229,14 +229,15 @@ class EagleDeepseekV3ForCausalLM(DeepseekV3ForCausalLM):
return
logits
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]]):
def
transform
(
inputs
):
name
,
loaded_weight
=
inputs
if
"lm_head"
not
in
name
:
name
=
"model."
+
name
return
name
,
loaded_weight
loader
=
AutoWeightsLoader
(
self
,
skip_prefixes
=
None
,
)
model_weights
=
{}
for
name
,
loaded_weight
in
weights
:
if
"lm_head"
not
in
name
:
name
=
"model."
+
name
model_weights
[
name
]
=
loaded_weight
loader
.
load_weights
(
model_weights
.
items
())
loader
.
load_weights
(
map
(
transform
,
weights
))
vllm/model_executor/models/llama4_eagle.py
View file @
1a0a04da
...
...
@@ -205,23 +205,21 @@ class EagleLlama4ForCausalLM(Llama4ForCausalLM):
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]])
->
None
:
def
transform
(
inputs
):
name
,
loaded_weight
=
inputs
name
,
weight
=
self
.
permute_qk_weight_for_rotary
(
name
,
loaded_weight
)
if
"lm_head"
not
in
name
:
name
=
"model."
+
name
return
name
,
weight
loader
=
AutoWeightsLoader
(
self
,
# lm_head is tied with target model (Llama4ForCausalLM)
skip_prefixes
=
([
"lm_head."
]),
)
model_weights
=
{}
weights
=
[
self
.
permute_qk_weight_for_rotary
(
name
,
loaded_weight
)
for
name
,
loaded_weight
in
weights
]
for
name
,
loaded_weight
in
weights
:
if
"lm_head"
not
in
name
:
name
=
"model."
+
name
model_weights
[
name
]
=
loaded_weight
loader
.
load_weights
(
model_weights
.
items
())
loader
.
load_weights
(
map
(
transform
,
weights
))
def
get_input_embeddings
(
self
,
...
...
vllm/model_executor/models/llama_eagle.py
View file @
1a0a04da
...
...
@@ -158,14 +158,15 @@ class EagleLlamaForCausalLM(LlamaForCausalLM):
return
self
.
model
(
input_ids
,
positions
,
hidden_states
)
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]]):
def
transform
(
inputs
):
name
,
loaded_weight
=
inputs
if
"lm_head"
not
in
name
:
name
=
"model."
+
name
return
name
,
loaded_weight
loader
=
AutoWeightsLoader
(
self
,
skip_prefixes
=
None
,
)
model_weights
=
{}
for
name
,
loaded_weight
in
weights
:
if
"lm_head"
not
in
name
:
name
=
"model."
+
name
model_weights
[
name
]
=
loaded_weight
loader
.
load_weights
(
model_weights
.
items
())
loader
.
load_weights
(
map
(
transform
,
weights
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment