Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
97000a2b
Unverified
Commit
97000a2b
authored
Dec 18, 2025
by
Wentao Ye
Committed by
GitHub
Dec 18, 2025
Browse files
[Bug] Fix compressed tensor not using deepgemm (#30820)
Signed-off-by:
yewentao256
<
zhyanwentao@126.com
>
parent
d2dc5dfc
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
1 deletion
+10
-1
vllm/model_executor/layers/fused_moe/fused_moe.py
vllm/model_executor/layers/fused_moe/fused_moe.py
+0
-1
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
...quantization/compressed_tensors/compressed_tensors_moe.py
+10
-0
No files found.
vllm/model_executor/layers/fused_moe/fused_moe.py
View file @
97000a2b
...
...
@@ -1696,7 +1696,6 @@ def fused_experts(
and
(
is_deep_gemm_e8m0_used
()
or
_valid_deep_gemm
(
hidden_states
,
w1
,
w2
))
):
assert
quant_config
is
not
None
assert
apply_router_weight_on_input
is
False
return
deep_gemm_moe_fp8
(
hidden_states
=
hidden_states
,
w1
=
w1
,
...
...
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
View file @
97000a2b
...
...
@@ -96,6 +96,7 @@ from vllm.utils.deep_gemm import (
get_col_major_tma_aligned_tensor
,
get_mk_alignment_for_contiguous_layout
,
is_deep_gemm_e8m0_used
,
is_deep_gemm_supported
,
)
from
vllm.utils.import_utils
import
has_deep_gemm
...
...
@@ -716,6 +717,13 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
get_marlin_input_dtype
(
layer_name
)
if
self
.
use_marlin
else
None
)
self
.
allow_deep_gemm
=
(
self
.
block_quant
and
envs
.
VLLM_MOE_USE_DEEP_GEMM
and
is_deep_gemm_supported
()
and
list
(
self
.
weight_block_size
)
==
get_mk_alignment_for_contiguous_layout
()
)
def
create_weights
(
self
,
layer
:
torch
.
nn
.
Module
,
...
...
@@ -1231,6 +1239,7 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
if
self
.
disable_expert_map
else
layer
.
expert_map
,
# ???
quant_config
=
self
.
moe_quant_config
,
allow_deep_gemm
=
self
.
allow_deep_gemm
,
)
else
:
from
vllm.model_executor.layers.fused_moe.cutlass_moe
import
(
...
...
@@ -1272,6 +1281,7 @@ class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod):
global_num_experts
=
layer
.
global_num_experts
,
expert_map
=
layer
.
expert_map
,
quant_config
=
self
.
moe_quant_config
,
allow_deep_gemm
=
self
.
allow_deep_gemm
,
)
@
property
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment