Commit 267cc5ff authored by zhuwenwen's avatar zhuwenwen
Browse files

update List

parent b1babea8
......@@ -1047,7 +1047,7 @@ def try_get_optimal_moe_config(
dtype: Optional[str],
M: int,
is_marlin: bool = False,
block_shape: Optional[list[int]] = None,
block_shape: Optional[List[int]] = None,
use_nn_moe: Optional[bool] = False,
) -> dict[str, int]:
from vllm.model_executor.layers.fused_moe import get_config
......@@ -1392,7 +1392,7 @@ def fused_experts(
w2_zp: Optional[torch.Tensor] = None,
a1_scale: Optional[torch.Tensor] = None,
a2_scale: Optional[torch.Tensor] = None,
block_shape: Optional[list[int]] = None,
block_shape: Optional[List[int]] = None,
allow_deep_gemm: bool = False,
allow_cutlass_block_scaled_grouped_gemm: bool = False,
use_nn_moe: Optional[bool] = False) -> torch.Tensor:
......@@ -1796,7 +1796,7 @@ class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
use_int8_w8a16: bool = False,
use_int4_w4a16: bool = False,
per_act_token_quant: bool = False,
block_shape: Optional[list[int]] = None,
block_shape: Optional[List[int]] = None,
):
super().__init__(
FusedMoEQuantConfig.make(
......@@ -1981,7 +1981,7 @@ def modular_triton_fused_moe(
use_int8_w8a16: bool,
use_int4_w4a16: bool,
per_act_token_quant: bool,
block_shape: Optional[list[int]] = None,
block_shape: Optional[List[int]] = None,
) -> mk.FusedMoEModularKernel:
return mk.FusedMoEModularKernel(
MoEPrepareAndFinalizeNoEP(),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment