Fix call to moe_mk in modelopt MoE modules (required for LoRA) (#34575)

Signed-off-by: Daniel Serebrenik <daserebrenik@nvidia.com>

Fix call to moe_mk in modelopt MoE modules (required for LoRA) (#34575)
Signed-off-by: Daniel Serebrenik <daserebrenik@nvidia.com>
ec7d9e67 · danisereb · GitHub · 3bb4e431 · ec7d9e67
Unverified Commit ec7d9e67 authored Feb 16, 2026 by danisereb Committed by GitHub Feb 16, 2026
Show whitespace changes
Inline Side-by-side

Showing with 10 additions and 10 deletions

vllm/model_executor/layers/quantization/modelopt.py vllm/model_executor/layers/quantization/modelopt.py +10 -10

No files found.
--- a/vllm/model_executor/layers/quantization/modelopt.py
+++ b/vllm/model_executor/layers/quantization/modelopt.py
@@ -977,11 +977,11 @@ class ModelOptFp8MoEMethod(FusedMoEMethodBase):

        assert self.moe_mk is not None
        return self.moe_mk(
-            x,
-            layer.w13_weight,
-            layer.w2_weight,
-            topk_weights,
-            topk_ids,
+            hidden_states=x,
+            w1=layer.w13_weight,
+            w2=layer.w2_weight,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
            activation=layer.activation,
            global_num_experts=layer.global_num_experts,
            expert_map=layer.expert_map,
@@ -1549,11 +1549,11 @@ class ModelOptNvFp4FusedMoE(FusedMoEMethodBase):
        else:
            assert self.moe_mk is not None
            return self.moe_mk(
-                x,
-                layer.w13_weight,
-                layer.w2_weight,
-                topk_weights,
-                topk_ids,
+                hidden_states=x,
+                w1=layer.w13_weight,
+                w2=layer.w2_weight,
+                topk_weights=topk_weights,
+                topk_ids=topk_ids,
                activation=layer.activation,
                global_num_experts=layer.global_num_experts,
                expert_map=layer.expert_map,