Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
eea3024f
Unverified
Commit
eea3024f
authored
Feb 12, 2026
by
Roger Wang
Committed by
GitHub
Feb 12, 2026
Browse files
[Bugfix] Fix mamba state dtype setting for Qwen3-Next and Qwen3.5 (#34489)
Signed-off-by:
Roger Wang
<
hey@rogerw.io
>
parent
2f308214
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
42 additions
and
6 deletions
+42
-6
vllm/model_executor/layers/mamba/mamba_utils.py
vllm/model_executor/layers/mamba/mamba_utils.py
+4
-2
vllm/model_executor/models/config.py
vllm/model_executor/models/config.py
+29
-0
vllm/model_executor/models/qwen3_5.py
vllm/model_executor/models/qwen3_5.py
+3
-2
vllm/model_executor/models/qwen3_next.py
vllm/model_executor/models/qwen3_next.py
+6
-2
No files found.
vllm/model_executor/layers/mamba/mamba_utils.py
View file @
eea3024f
...
@@ -80,9 +80,11 @@ class MambaStateDtypeCalculator:
...
@@ -80,9 +80,11 @@ class MambaStateDtypeCalculator:
cls
,
cls
,
model_dtype
:
ModelDType
|
torch
.
dtype
,
model_dtype
:
ModelDType
|
torch
.
dtype
,
mamba_cache_dtype
:
MambaDType
,
mamba_cache_dtype
:
MambaDType
,
mamba_ssm_cache_dtype
:
MambaDType
=
"auto"
,
)
->
tuple
[
torch
.
dtype
,
torch
.
dtype
]:
)
->
tuple
[
torch
.
dtype
,
torch
.
dtype
]:
state_dtype
=
get_kv_cache_torch_dtype
(
mamba_cache_dtype
,
model_dtype
)
return
cls
.
_mamba_state_dtype
(
return
(
state_dtype
,
state_dtype
)
model_dtype
,
mamba_cache_dtype
,
mamba_ssm_cache_dtype
)
@
classmethod
@
classmethod
def
kda_state_dtype
(
def
kda_state_dtype
(
...
...
vllm/model_executor/models/config.py
View file @
eea3024f
...
@@ -582,6 +582,33 @@ class NemotronHForCausalLMConfig(VerifyAndUpdateConfig):
...
@@ -582,6 +582,33 @@ class NemotronHForCausalLMConfig(VerifyAndUpdateConfig):
cache_config
.
mamba_ssm_cache_dtype
=
mamba_ssm_cache_dtype
cache_config
.
mamba_ssm_cache_dtype
=
mamba_ssm_cache_dtype
class
Qwen3_5ForConditionalGenerationConfig
(
VerifyAndUpdateConfig
):
@
staticmethod
def
verify_and_update_config
(
vllm_config
:
"VllmConfig"
)
->
None
:
"""Update mamba_ssm_cache_dtype for Qwen3.5 models when set to 'auto'
(or not explicitly set), to the value specified in the HF config's
mamba_ssm_dtype field. Warn if the user explicitly overrides it to a
different value.
"""
cache_config
=
vllm_config
.
cache_config
hf_text_config
=
vllm_config
.
model_config
.
hf_text_config
mamba_ssm_dtype
=
getattr
(
hf_text_config
,
"mamba_ssm_dtype"
,
None
)
if
cache_config
.
mamba_ssm_cache_dtype
==
"auto"
:
if
mamba_ssm_dtype
is
not
None
:
cache_config
.
mamba_ssm_cache_dtype
=
mamba_ssm_dtype
elif
(
mamba_ssm_dtype
is
not
None
and
cache_config
.
mamba_ssm_cache_dtype
!=
mamba_ssm_dtype
):
logger
.
warning
(
"Qwen3.5 model specifies mamba_ssm_dtype='%s' in its config, "
"but --mamba-ssm-cache-dtype='%s' was passed. "
"Using the user-specified value."
,
mamba_ssm_dtype
,
cache_config
.
mamba_ssm_cache_dtype
,
)
class
VoyageQwen3BidirectionalEmbedModelConfig
(
VerifyAndUpdateConfig
):
class
VoyageQwen3BidirectionalEmbedModelConfig
(
VerifyAndUpdateConfig
):
@
staticmethod
@
staticmethod
def
verify_and_update_model_config
(
model_config
:
"ModelConfig"
)
->
None
:
def
verify_and_update_model_config
(
model_config
:
"ModelConfig"
)
->
None
:
...
@@ -611,5 +638,7 @@ MODELS_CONFIG_MAP: dict[str, type[VerifyAndUpdateConfig]] = {
...
@@ -611,5 +638,7 @@ MODELS_CONFIG_MAP: dict[str, type[VerifyAndUpdateConfig]] = {
"DeepseekV32ForCausalLM"
:
DeepseekV32ForCausalLM
,
"DeepseekV32ForCausalLM"
:
DeepseekV32ForCausalLM
,
"NemotronHForCausalLM"
:
NemotronHForCausalLMConfig
,
"NemotronHForCausalLM"
:
NemotronHForCausalLMConfig
,
"NemotronHPuzzleForCausalLM"
:
NemotronHForCausalLMConfig
,
"NemotronHPuzzleForCausalLM"
:
NemotronHForCausalLMConfig
,
"Qwen3_5ForConditionalGeneration"
:
Qwen3_5ForConditionalGenerationConfig
,
"Qwen3_5MoeForConditionalGeneration"
:
Qwen3_5ForConditionalGenerationConfig
,
"VoyageQwen3BidirectionalEmbedModel"
:
VoyageQwen3BidirectionalEmbedModelConfig
,
"VoyageQwen3BidirectionalEmbedModel"
:
VoyageQwen3BidirectionalEmbedModelConfig
,
}
}
vllm/model_executor/models/qwen3_5.py
View file @
eea3024f
...
@@ -870,9 +870,10 @@ class Qwen3_5ForConditionalGeneration(Qwen3VLForConditionalGeneration, IsHybrid)
...
@@ -870,9 +870,10 @@ class Qwen3_5ForConditionalGeneration(Qwen3VLForConditionalGeneration, IsHybrid)
cls
,
cls
,
vllm_config
:
"VllmConfig"
,
vllm_config
:
"VllmConfig"
,
)
->
tuple
[
torch
.
dtype
,
torch
.
dtype
]:
)
->
tuple
[
torch
.
dtype
,
torch
.
dtype
]:
mamba_ssm_dtype
=
vllm_config
.
model_config
.
hf_text_config
.
mamba_ssm_dtype
return
MambaStateDtypeCalculator
.
gated_delta_net_state_dtype
(
return
MambaStateDtypeCalculator
.
gated_delta_net_state_dtype
(
vllm_config
.
model_config
.
dtype
,
mamba_ssm_dtype
vllm_config
.
model_config
.
dtype
,
vllm_config
.
cache_config
.
mamba_cache_dtype
,
vllm_config
.
cache_config
.
mamba_ssm_cache_dtype
,
)
)
@
classmethod
@
classmethod
...
...
vllm/model_executor/models/qwen3_next.py
View file @
eea3024f
...
@@ -341,7 +341,9 @@ class Qwen3NextGatedDeltaNet(nn.Module, MambaBase):
...
@@ -341,7 +341,9 @@ class Qwen3NextGatedDeltaNet(nn.Module, MambaBase):
def
get_state_dtype
(
self
)
->
tuple
[
torch
.
dtype
,
torch
.
dtype
]:
def
get_state_dtype
(
self
)
->
tuple
[
torch
.
dtype
,
torch
.
dtype
]:
return
MambaStateDtypeCalculator
.
gated_delta_net_state_dtype
(
return
MambaStateDtypeCalculator
.
gated_delta_net_state_dtype
(
self
.
model_config
.
dtype
,
self
.
cache_config
.
mamba_cache_dtype
self
.
model_config
.
dtype
,
self
.
cache_config
.
mamba_cache_dtype
,
self
.
cache_config
.
mamba_ssm_cache_dtype
,
)
)
def
get_state_shape
(
self
)
->
tuple
[
tuple
[
int
,
...],
tuple
[
int
,
...]]:
def
get_state_shape
(
self
)
->
tuple
[
tuple
[
int
,
...],
tuple
[
int
,
...]]:
...
@@ -1372,7 +1374,9 @@ class Qwen3NextForCausalLM(
...
@@ -1372,7 +1374,9 @@ class Qwen3NextForCausalLM(
vllm_config
:
"VllmConfig"
,
vllm_config
:
"VllmConfig"
,
)
->
tuple
[
torch
.
dtype
,
torch
.
dtype
]:
)
->
tuple
[
torch
.
dtype
,
torch
.
dtype
]:
return
MambaStateDtypeCalculator
.
gated_delta_net_state_dtype
(
return
MambaStateDtypeCalculator
.
gated_delta_net_state_dtype
(
vllm_config
.
model_config
.
dtype
,
vllm_config
.
cache_config
.
mamba_cache_dtype
vllm_config
.
model_config
.
dtype
,
vllm_config
.
cache_config
.
mamba_cache_dtype
,
vllm_config
.
cache_config
.
mamba_ssm_cache_dtype
,
)
)
@
classmethod
@
classmethod
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment