Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b31c7251
Commit
b31c7251
authored
Feb 03, 2026
by
zhuwenwen
Browse files
fix run error
parent
bdd33b3f
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
10 additions
and
4 deletions
+10
-4
vllm/config/model.py
vllm/config/model.py
+1
-0
vllm/config/speculative.py
vllm/config/speculative.py
+1
-1
vllm/model_executor/models/deepseek_v2.py
vllm/model_executor/models/deepseek_v2.py
+8
-3
No files found.
vllm/config/model.py
View file @
b31c7251
...
@@ -912,6 +912,7 @@ class ModelConfig:
...
@@ -912,6 +912,7 @@ class ModelConfig:
# imports during override detection (e.g., MXFP4 imports Triton)
# imports during override detection (e.g., MXFP4 imports Triton)
"mxfp4"
,
"mxfp4"
,
"cpu_awq"
,
"cpu_awq"
,
"slimquant_marlin"
,
"slimquant_w4a8_marlin"
,
"slimquant_w4a8_marlin"
,
"slimquant_compressed_tensors_marlin"
,
"slimquant_compressed_tensors_marlin"
,
]
]
...
...
vllm/config/speculative.py
View file @
b31c7251
...
@@ -371,7 +371,7 @@ class SpeculativeConfig:
...
@@ -371,7 +371,7 @@ class SpeculativeConfig:
tokenizer_revision
=
self
.
target_model_config
.
tokenizer_revision
,
tokenizer_revision
=
self
.
target_model_config
.
tokenizer_revision
,
spec_target_max_model_len
=
self
.
target_model_config
.
max_model_len
,
spec_target_max_model_len
=
self
.
target_model_config
.
max_model_len
,
quantization
=
self
.
quantization
,
quantization
=
self
.
quantization
,
enforce_eager
=
True
if
envs
.
VLLM_SPEC_DECODE_EAGER
else
self
.
target_model_config
.
enforce_eager
,
enforce_eager
=
self
.
target_model_config
.
enforce_eager
,
max_logprobs
=
self
.
target_model_config
.
max_logprobs
,
max_logprobs
=
self
.
target_model_config
.
max_logprobs
,
hf_overrides
=
SpeculativeConfig
.
hf_config_override
,
hf_overrides
=
SpeculativeConfig
.
hf_config_override
,
config_format
=
self
.
target_model_config
.
config_format
,
config_format
=
self
.
target_model_config
.
config_format
,
...
...
vllm/model_executor/models/deepseek_v2.py
View file @
b31c7251
...
@@ -263,6 +263,11 @@ class DeepseekV2MoE(nn.Module):
...
@@ -263,6 +263,11 @@ class DeepseekV2MoE(nn.Module):
prefix
=
f
"
{
prefix
}
.gate"
,
prefix
=
f
"
{
prefix
}
.gate"
,
)
)
if
getattr
(
config
,
"topk_method"
,
None
)
==
"noaux_tc"
:
if
getattr
(
config
,
"topk_method"
,
None
)
==
"noaux_tc"
:
if
envs
.
VLLM_ENABLE_MOE_FUSED_GATE
:
# avoid moe_fused_gate precision error
self
.
gate
.
e_score_correction_bias
=
nn
.
Parameter
(
torch
.
empty
(
config
.
n_routed_experts
))
else
:
self
.
gate
.
e_score_correction_bias
=
nn
.
Parameter
(
self
.
gate
.
e_score_correction_bias
=
nn
.
Parameter
(
torch
.
empty
(
config
.
n_routed_experts
,
dtype
=
torch
.
float32
)
torch
.
empty
(
config
.
n_routed_experts
,
dtype
=
torch
.
float32
)
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment