Commit 048f7316 authored by zhuwenwen's avatar zhuwenwen
Browse files

add use_int4_w4a8 of fused_experts

parent 11b94900
...@@ -1642,6 +1642,7 @@ def fused_experts( ...@@ -1642,6 +1642,7 @@ def fused_experts(
quant_config: Optional[FusedMoEQuantConfig] = None, quant_config: Optional[FusedMoEQuantConfig] = None,
allow_deep_gemm: bool = False, allow_deep_gemm: bool = False,
allow_cutlass_block_scaled_grouped_gemm: bool = False, allow_cutlass_block_scaled_grouped_gemm: bool = False,
use_int4_w4a8: bool = False,
use_nn_moe: Optional[bool] = False, use_nn_moe: Optional[bool] = False,
) -> torch.Tensor: ) -> torch.Tensor:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment