Unverified Commit 5a435507 authored by Seungho Yoon's avatar Seungho Yoon Committed by GitHub
Browse files

fix(mxfp4): return is_monolithic=False when LoRA is enabled for Triton backend (#35382)


Signed-off-by: default avatarSeungho Yoon <yoonsnowdev@gmail.com>
parent 59d7af9c
...@@ -1001,6 +1001,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase): ...@@ -1001,6 +1001,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
@property @property
def is_monolithic(self) -> bool: def is_monolithic(self) -> bool:
if self.moe.is_lora_enabled:
return False
return ( return (
self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_MXFP8_TRTLLM self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_MXFP8_TRTLLM
or self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_BF16 or self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_BF16
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment