Commit 267cc5ff authored by zhuwenwen's avatar zhuwenwen
Browse files

update List

parent b1babea8
...@@ -1047,7 +1047,7 @@ def try_get_optimal_moe_config( ...@@ -1047,7 +1047,7 @@ def try_get_optimal_moe_config(
dtype: Optional[str], dtype: Optional[str],
M: int, M: int,
is_marlin: bool = False, is_marlin: bool = False,
block_shape: Optional[list[int]] = None, block_shape: Optional[List[int]] = None,
use_nn_moe: Optional[bool] = False, use_nn_moe: Optional[bool] = False,
) -> dict[str, int]: ) -> dict[str, int]:
from vllm.model_executor.layers.fused_moe import get_config from vllm.model_executor.layers.fused_moe import get_config
...@@ -1392,7 +1392,7 @@ def fused_experts( ...@@ -1392,7 +1392,7 @@ def fused_experts(
w2_zp: Optional[torch.Tensor] = None, w2_zp: Optional[torch.Tensor] = None,
a1_scale: Optional[torch.Tensor] = None, a1_scale: Optional[torch.Tensor] = None,
a2_scale: Optional[torch.Tensor] = None, a2_scale: Optional[torch.Tensor] = None,
block_shape: Optional[list[int]] = None, block_shape: Optional[List[int]] = None,
allow_deep_gemm: bool = False, allow_deep_gemm: bool = False,
allow_cutlass_block_scaled_grouped_gemm: bool = False, allow_cutlass_block_scaled_grouped_gemm: bool = False,
use_nn_moe: Optional[bool] = False) -> torch.Tensor: use_nn_moe: Optional[bool] = False) -> torch.Tensor:
...@@ -1796,7 +1796,7 @@ class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute): ...@@ -1796,7 +1796,7 @@ class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
use_int8_w8a16: bool = False, use_int8_w8a16: bool = False,
use_int4_w4a16: bool = False, use_int4_w4a16: bool = False,
per_act_token_quant: bool = False, per_act_token_quant: bool = False,
block_shape: Optional[list[int]] = None, block_shape: Optional[List[int]] = None,
): ):
super().__init__( super().__init__(
FusedMoEQuantConfig.make( FusedMoEQuantConfig.make(
...@@ -1981,7 +1981,7 @@ def modular_triton_fused_moe( ...@@ -1981,7 +1981,7 @@ def modular_triton_fused_moe(
use_int8_w8a16: bool, use_int8_w8a16: bool,
use_int4_w4a16: bool, use_int4_w4a16: bool,
per_act_token_quant: bool, per_act_token_quant: bool,
block_shape: Optional[list[int]] = None, block_shape: Optional[List[int]] = None,
) -> mk.FusedMoEModularKernel: ) -> mk.FusedMoEModularKernel:
return mk.FusedMoEModularKernel( return mk.FusedMoEModularKernel(
MoEPrepareAndFinalizeNoEP(), MoEPrepareAndFinalizeNoEP(),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment