Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a02a1c83
Commit
a02a1c83
authored
Sep 11, 2025
by
zhuwenwen
Browse files
remove redundant routed_scaling_factor
parent
a053add9
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
1 addition
and
7 deletions
+1
-7
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
...quantization/compressed_tensors/compressed_tensors_moe.py
+0
-2
vllm/model_executor/layers/quantization/moe_wna16.py
vllm/model_executor/layers/quantization/moe_wna16.py
+0
-2
vllm/model_executor/models/deepseek_v2.py
vllm/model_executor/models/deepseek_v2.py
+1
-2
vllm/model_executor/models/deepseek_v3.py
vllm/model_executor/models/deepseek_v3.py
+0
-1
No files found.
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
View file @
a02a1c83
...
...
@@ -838,7 +838,6 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
activation
:
str
=
"silu"
,
enable_eplb
:
bool
=
False
,
use_nn_moe
:
Optional
[
bool
]
=
False
,
routed_scaling_factor
:
Optional
[
float
]
=
None
,
use_fused_gate
:
Optional
[
bool
]
=
False
,
expert_load_view
:
Optional
[
torch
.
Tensor
]
=
None
,
logical_to_physical_map
:
Optional
[
torch
.
Tensor
]
=
None
,
...
...
@@ -1112,7 +1111,6 @@ class CompressedTensorsW8A8Int8MoEMethod(CompressedTensorsMoEMethod):
activation
:
str
=
"silu"
,
enable_eplb
:
bool
=
False
,
use_nn_moe
:
Optional
[
bool
]
=
False
,
routed_scaling_factor
:
Optional
[
float
]
=
None
,
use_fused_gate
:
Optional
[
bool
]
=
False
,
expert_load_view
:
Optional
[
torch
.
Tensor
]
=
None
,
logical_to_physical_map
:
Optional
[
torch
.
Tensor
]
=
None
,
...
...
vllm/model_executor/layers/quantization/moe_wna16.py
View file @
a02a1c83
...
...
@@ -348,7 +348,6 @@ class MoeWNA16Method(FusedMoEMethodBase):
logical_to_physical_map
:
Optional
[
torch
.
Tensor
]
=
None
,
logical_replica_count
:
Optional
[
torch
.
Tensor
]
=
None
,
use_nn_moe
:
Optional
[
bool
]
=
False
,
routed_scaling_factor
:
Optional
[
float
]
=
None
,
use_fused_gate
:
Optional
[
bool
]
=
False
,
)
->
torch
.
Tensor
:
assert
self
.
fused_experts
is
None
...
...
@@ -370,7 +369,6 @@ class MoeWNA16Method(FusedMoEMethodBase):
routed_scaling_factor
=
routed_scaling_factor
,
e_score_correction_bias
=
e_score_correction_bias
,
indices_type
=
self
.
topk_indices_dtype
,
routed_scaling_factor
=
routed_scaling_factor
,
use_fused_gate
=
use_fused_gate
)
weight_bits
=
self
.
quant_config
.
weight_bits
...
...
vllm/model_executor/models/deepseek_v2.py
View file @
a02a1c83
...
...
@@ -170,8 +170,7 @@ class DeepseekV2MoE(nn.Module):
routed_scaling_factor
=
self
.
routed_scaling_factor
,
e_score_correction_bias
=
self
.
gate
.
e_score_correction_bias
,
enable_eplb
=
self
.
enable_eplb
,
num_redundant_experts
=
self
.
n_redundant_experts
,
routed_scaling_factor
=
self
.
routed_scaling_factor
)
num_redundant_experts
=
self
.
n_redundant_experts
)
if
config
.
n_shared_experts
is
not
None
:
intermediate_size
=
(
config
.
moe_intermediate_size
*
...
...
vllm/model_executor/models/deepseek_v3.py
View file @
a02a1c83
...
...
@@ -104,7 +104,6 @@ class DeepseekV3MoE(nn.Module):
self
.
tp_size
=
get_tensor_model_parallel_world_size
()
self
.
routed_scaling_factor
=
config
.
routed_scaling_factor
self
.
n_shared_experts
=
config
.
n_shared_experts
self
.
routed_scaling_factor
=
config
.
routed_scaling_factor
if
self
.
tp_size
>
config
.
n_routed_experts
:
raise
ValueError
(
f
"Tensor parallel size
{
self
.
tp_size
}
is greater than "
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment