Unverified Commit 9a719b7a authored by Kaixi Hou's avatar Kaixi Hou Committed by GitHub
Browse files

[NVIDIA] Remove unused `get_fused_moe_impl_class` function (#9764)

parent 3fa62da7
......@@ -1074,16 +1074,3 @@ class FlashInferFP4MoE(FusedMoE):
)[0]
return result
def get_fused_moe_impl_class():
"""Factory function to get the appropriate FusedMoE implementation class."""
if should_use_flashinfer_trtllm_moe() and _is_fp4_quantization_enabled():
# Use FP4 variant when FP4 quantization is enabled
return FlashInferFP4MoE
elif should_use_flashinfer_trtllm_moe():
# Use regular FlashInfer variant for non-FP4 FlashInfer cases
return FlashInferFusedMoE
else:
# Default case
return FusedMoE
......@@ -635,11 +635,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
layer.register_parameter("w13_weight_scale_inv", w13_weight_scale)
layer.register_parameter("w2_weight_scale_inv", w2_weight_scale)
assert self.quant_config.activation_scheme == "dynamic"
if (
get_bool_env_var("SGLANG_CUTLASS_MOE")
and self.cutlass_fp8_supported
and (is_sm100_supported() or is_sm90_supported())
):
if self.use_cutlass_fused_experts_fp8:
self.ab_strides1 = torch.full(
(num_experts,),
hidden_size,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment