Unverified Commit 269c4db0 authored by Varun Sundar Rabindranath's avatar Varun Sundar Rabindranath Committed by GitHub
Browse files

[Misc][DP] Guard mxfp4 implementation selection (#27484)


Signed-off-by: default avatarVarun Sundar Rabindranath <vsundarr@redhat.com>
Co-authored-by: default avatarVarun Sundar Rabindranath <vsundarr@redhat.com>
parent 52efc34e
...@@ -794,7 +794,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase): ...@@ -794,7 +794,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
) )
else: else:
raise NotImplementedError( raise NotImplementedError(
"Incompatible Mxfp4 backend for EP batched experts format" f"Incompatible Mxfp4 backend ({self.mxfp4_backend}) for "
"EP batched experts format"
) )
else: else:
assert self.moe_quant_config is not None assert self.moe_quant_config is not None
...@@ -813,8 +814,12 @@ class Mxfp4MoEMethod(FusedMoEMethodBase): ...@@ -813,8 +814,12 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
return TrtLlmGenExperts(self.moe, self.moe_quant_config, **kwargs) return TrtLlmGenExperts(self.moe, self.moe_quant_config, **kwargs)
elif self.mxfp4_backend == Mxfp4Backend.MARLIN: elif self.mxfp4_backend == Mxfp4Backend.MARLIN:
return MarlinExperts(self.moe_quant_config) return MarlinExperts(self.moe_quant_config)
else: elif self.mxfp4_backend == Mxfp4Backend.TRITON:
return OAITritonExperts(self.moe_quant_config) return OAITritonExperts(self.moe_quant_config)
else:
raise NotImplementedError(
f"Incompatible Mxfp4 backend ({self.mxfp4_backend}) for EP"
)
def _route_and_experts( def _route_and_experts(
self, self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment