[BugFix] Fix FusedMoELoRA + ModularKernel Integration (#28237)

Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>

[BugFix] Fix FusedMoELoRA + ModularKernel Integration (#28237)
Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
ca6f755d · Varun Sundar Rabindranath · GitHub · ca90f503 · ca6f755d
Unverified Commit ca6f755d authored Nov 06, 2025 by Varun Sundar Rabindranath Committed by GitHub Nov 06, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

vllm/lora/layers/fused_moe.py vllm/lora/layers/fused_moe.py +3 -3

No files found.
--- a/vllm/lora/layers/fused_moe.py
+++ b/vllm/lora/layers/fused_moe.py
@@ -25,6 +25,7 @@ from vllm.model_executor.layers.fused_moe.fused_moe import (
    modular_triton_fused_moe,
    try_get_optimal_moe_config,
 )
+from vllm.model_executor.layers.fused_moe.layer import FusedMoEModularMethod
 class FusedMoEWithLoRA(BaseLayerWithLoRA):
@@ -280,10 +281,9 @@ class FusedMoEWithLoRA(BaseLayerWithLoRA):
            self.base_layer, fused_experts.moe_sum
        )
-        self.base_layer.quant_method.old_fused_experts = (
+        self.base_layer.quant_method = FusedMoEModularMethod(
-            self.base_layer.quant_method.fused_experts
+            self.base_layer.quant_method, m_fused_moe_fn
        )
-        self.base_layer.quant_method.fused_experts = m_fused_moe_fn
    def create_lora_weights(
        self,