Unverified Commit ca6f755d authored by Varun Sundar Rabindranath's avatar Varun Sundar Rabindranath Committed by GitHub
Browse files

[BugFix] Fix FusedMoELoRA + ModularKernel Integration (#28237)


Signed-off-by: default avatarVarun Sundar Rabindranath <vsundarr@redhat.com>
Co-authored-by: default avatarVarun Sundar Rabindranath <vsundarr@redhat.com>
parent ca90f503
...@@ -25,6 +25,7 @@ from vllm.model_executor.layers.fused_moe.fused_moe import ( ...@@ -25,6 +25,7 @@ from vllm.model_executor.layers.fused_moe.fused_moe import (
modular_triton_fused_moe, modular_triton_fused_moe,
try_get_optimal_moe_config, try_get_optimal_moe_config,
) )
from vllm.model_executor.layers.fused_moe.layer import FusedMoEModularMethod
class FusedMoEWithLoRA(BaseLayerWithLoRA): class FusedMoEWithLoRA(BaseLayerWithLoRA):
...@@ -280,10 +281,9 @@ class FusedMoEWithLoRA(BaseLayerWithLoRA): ...@@ -280,10 +281,9 @@ class FusedMoEWithLoRA(BaseLayerWithLoRA):
self.base_layer, fused_experts.moe_sum self.base_layer, fused_experts.moe_sum
) )
self.base_layer.quant_method.old_fused_experts = ( self.base_layer.quant_method = FusedMoEModularMethod(
self.base_layer.quant_method.fused_experts self.base_layer.quant_method, m_fused_moe_fn
) )
self.base_layer.quant_method.fused_experts = m_fused_moe_fn
def create_lora_weights( def create_lora_weights(
self, self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment