Unverified Commit 269c4db0 authored by Varun Sundar Rabindranath's avatar Varun Sundar Rabindranath Committed by GitHub
Browse files

[Misc][DP] Guard mxfp4 implementation selection (#27484)


Signed-off-by: default avatarVarun Sundar Rabindranath <vsundarr@redhat.com>
Co-authored-by: default avatarVarun Sundar Rabindranath <vsundarr@redhat.com>
parent 52efc34e
......@@ -794,7 +794,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
)
else:
raise NotImplementedError(
"Incompatible Mxfp4 backend for EP batched experts format"
f"Incompatible Mxfp4 backend ({self.mxfp4_backend}) for "
"EP batched experts format"
)
else:
assert self.moe_quant_config is not None
......@@ -813,8 +814,12 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
return TrtLlmGenExperts(self.moe, self.moe_quant_config, **kwargs)
elif self.mxfp4_backend == Mxfp4Backend.MARLIN:
return MarlinExperts(self.moe_quant_config)
else:
elif self.mxfp4_backend == Mxfp4Backend.TRITON:
return OAITritonExperts(self.moe_quant_config)
else:
raise NotImplementedError(
f"Incompatible Mxfp4 backend ({self.mxfp4_backend}) for EP"
)
def _route_and_experts(
self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment