[Bigfix]fix lora test by pass padded size back to the layer (#37811)

c058ff44 · Yongye Zhu · GitHub · ce9b1d76 · c058ff44
Unverified Commit c058ff44 authored Mar 22, 2026 by Yongye Zhu Committed by GitHub Mar 22, 2026
Show whitespace changes
Inline Side-by-side

Showing with 9 additions and 0 deletions

vllm/model_executor/layers/quantization/mxfp4.py vllm/model_executor/layers/quantization/mxfp4.py +9 -0

No files found.
--- a/vllm/model_executor/layers/quantization/mxfp4.py
+++ b/vllm/model_executor/layers/quantization/mxfp4.py
@@ -154,6 +154,15 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
        )
        self.hidden_size = hidden_size = self.moe.hidden_dim
+        # Expose padded dimensions on the layer for LoRA and Marlin code
+        # that reads layer.hidden_size / layer.intermediate_size_per_partition.
+        layer.params_dtype = params_dtype
+        layer.num_experts = num_experts
+        layer.hidden_size = hidden_size
+        layer.intermediate_size_per_partition = (
+            intermediate_size_per_partition_after_pad
+        )
        # Fused gate_up_proj (column parallel)
        w13_weight = torch.nn.Parameter(
            torch.zeros(