Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f5d412ba
Unverified
Commit
f5d412ba
authored
Aug 16, 2025
by
Thomas Parnell
Committed by
GitHub
Aug 15, 2025
Browse files
[BugFix] Fix regression caused by mamba state dtype PR (#22998)
Signed-off-by:
Thomas Parnell
<
tpa@zurich.ibm.com
>
parent
177e55e3
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
12 additions
and
4 deletions
+12
-4
vllm/model_executor/models/phi4flash.py
vllm/model_executor/models/phi4flash.py
+6
-2
vllm/model_executor/models/plamo2.py
vllm/model_executor/models/plamo2.py
+6
-2
No files found.
vllm/model_executor/models/phi4flash.py
View file @
f5d412ba
...
@@ -650,8 +650,12 @@ class Phi4FlashForCausalLM(nn.Module, HasInnerState, IsHybrid, SupportsV0Only):
...
@@ -650,8 +650,12 @@ class Phi4FlashForCausalLM(nn.Module, HasInnerState, IsHybrid, SupportsV0Only):
num_mamba_layers
=
self
.
config
.
num_hidden_layers
\
num_mamba_layers
=
self
.
config
.
num_hidden_layers
\
//
2
//
self
.
config
.
mb_per_layer
+
1
//
2
//
self
.
config
.
mb_per_layer
+
1
self
.
mamba_cache
=
MambaCacheManager
(
self
.
mamba_cache
=
MambaCacheManager
(
self
.
vllm_config
,
self
.
lm_head
.
weight
.
dtype
,
num_mamba_layers
,
self
.
vllm_config
,
*
self
.
_get_mamba_cache_shape
())
num_mamba_layers
,
*
self
.
_get_mamba_cache_shape
(),
self
.
lm_head
.
weight
.
dtype
,
self
.
lm_head
.
weight
.
dtype
,
)
mamba_cache_params
=
self
.
mamba_cache
.
current_run_tensors
(
**
kwargs
)
mamba_cache_params
=
self
.
mamba_cache
.
current_run_tensors
(
**
kwargs
)
attn_metadata
=
get_forward_context
().
attn_metadata
attn_metadata
=
get_forward_context
().
attn_metadata
...
...
vllm/model_executor/models/plamo2.py
View file @
f5d412ba
...
@@ -767,8 +767,12 @@ class Plamo2ForCausalLM(Plamo2PreTrainedModel, HasInnerState, SupportsPP,
...
@@ -767,8 +767,12 @@ class Plamo2ForCausalLM(Plamo2PreTrainedModel, HasInnerState, SupportsPP,
self
.
vllm_config
.
parallel_config
,
LayerBlockType
.
mamba
)
self
.
vllm_config
.
parallel_config
,
LayerBlockType
.
mamba
)
self
.
mamba_cache
=
MambaCacheManager
(
self
.
mamba_cache
=
MambaCacheManager
(
self
.
vllm_config
,
self
.
lm_head
.
weight
.
dtype
,
num_mamba_layers
,
self
.
vllm_config
,
*
self
.
_get_mamba_cache_shape
())
num_mamba_layers
,
*
self
.
_get_mamba_cache_shape
(),
self
.
lm_head
.
weight
.
dtype
,
self
.
lm_head
.
weight
.
dtype
,
)
mamba_cache_params
=
self
.
mamba_cache
.
current_run_tensors
(
**
kwargs
)
mamba_cache_params
=
self
.
mamba_cache
.
current_run_tensors
(
**
kwargs
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment