Unverified Commit 156e3355 authored by amirkl94's avatar amirkl94 Committed by GitHub
Browse files

Fix: Re-Enable EP for trtllm MoE FP8 backend (#36494)


Signed-off-by: default avatarAmir Klein <203507526+amirkl94@users.noreply.github.com>
parent d0cd736c
......@@ -35,12 +35,6 @@ class TrtLlmFp8Experts(mk.FusedMoEExpertsMonolithic):
):
super().__init__(moe_config, quant_config)
if moe_config.moe_parallel_config.use_ep and quant_config.is_per_tensor:
raise NotImplementedError(
"EP parallelism is not supported with TRTLLM"
"per-tensor FP8 quantization."
)
self.routing_method_type = moe_config.routing_method
self.topk = moe_config.experts_per_token
self.intermediate_size_per_partition = (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment