Commit 1072b724 authored by zhuwenwen's avatar zhuwenwen
Browse files

fix moe params and run error

parent 2461ea9d
...@@ -1655,6 +1655,7 @@ def fused_experts( ...@@ -1655,6 +1655,7 @@ def fused_experts(
quant_config: FusedMoEQuantConfig | None = None, quant_config: FusedMoEQuantConfig | None = None,
allow_deep_gemm: bool = False, allow_deep_gemm: bool = False,
allow_cutlass_block_scaled_grouped_gemm: bool = False, allow_cutlass_block_scaled_grouped_gemm: bool = False,
use_nn_moe: bool | None = False,
) -> torch.Tensor: ) -> torch.Tensor:
if quant_config is None: if quant_config is None:
quant_config = FUSED_MOE_UNQUANTIZED_CONFIG quant_config = FUSED_MOE_UNQUANTIZED_CONFIG
...@@ -1732,6 +1733,7 @@ def fused_experts( ...@@ -1732,6 +1733,7 @@ def fused_experts(
block_shape=quant_config.block_shape, block_shape=quant_config.block_shape,
w1_bias=quant_config.w1_bias, w1_bias=quant_config.w1_bias,
w2_bias=quant_config.w2_bias, w2_bias=quant_config.w2_bias,
use_nn_moe=use_nn_moe,
) )
......
...@@ -1985,6 +1985,7 @@ class FusedMoE(CustomOp): ...@@ -1985,6 +1985,7 @@ class FusedMoE(CustomOp):
if do_naive_dispatch_combine if do_naive_dispatch_combine
else hidden_states, else hidden_states,
router_logits=router_logits, router_logits=router_logits,
use_nn_moe=self.use_nn_moe,
) )
if has_separate_shared_experts: if has_separate_shared_experts:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment