[CI][Bugfix][AMD][ Ensure weights created when using emulating OCP MXFP4 (#36993)

Signed-off-by: Randall Smith <Randall.Smith@amd.com>

[CI][Bugfix][AMD][ Ensure weights created when using emulating OCP MXFP4 (#36993)
Signed-off-by: Randall Smith <Randall.Smith@amd.com>
83d09d36 · rasmith · GitHub · 92b9afee · 83d09d36
Unverified Commit 83d09d36 authored Apr 07, 2026 by rasmith Committed by GitHub Apr 08, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 13 additions and 7 deletions

vllm/model_executor/layers/quantization/quark/schemes/quark_ocp_mx.py ...xecutor/layers/quantization/quark/schemes/quark_ocp_mx.py +13 -7

No files found.
--- a/vllm/model_executor/layers/quantization/quark/schemes/quark_ocp_mx.py
+++ b/vllm/model_executor/layers/quantization/quark/schemes/quark_ocp_mx.py
@@ -267,20 +267,26 @@ class QuarkOCP_MX(QuarkScheme):
    def get_min_capability(cls) -> int:
        return 70

+    def process_dynamic_mxfp4_weights_after_loading(
+        self, layer: torch.nn.Module
+    ) -> None:
+        w_q, w_s = dynamic_mxfp4_quant(layer.weight)
+        layer.weight_scale = torch.nn.Parameter(w_s.T.contiguous(), requires_grad=False)
+        layer.weight = torch.nn.Parameter(w_q, requires_grad=False)
+
    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
        layer.weight = torch.nn.Parameter(layer.weight.data, requires_grad=False)

        if self.emulate:
-            layer.weight_scale = torch.nn.Parameter(
-                layer.weight_scale.data, requires_grad=False
-            )
-        else:
            if self.dynamic_mxfp4_quant:
-                w_q, w_s = dynamic_mxfp4_quant(layer.weight)
+                self.process_dynamic_mxfp4_weights_after_loading(layer)
+            else:
                layer.weight_scale = torch.nn.Parameter(
-                    w_s.T.contiguous(), requires_grad=False
+                    layer.weight_scale.data, requires_grad=False
                )
-                layer.weight = torch.nn.Parameter(w_q, requires_grad=False)
+        else:
+            if self.dynamic_mxfp4_quant:
+                self.process_dynamic_mxfp4_weights_after_loading(layer)
            elif self.rocm_use_aiter_fp4_asm_gemm:
                # shuffle weight scale
                weight_scale_shuffle = layer.weight_scale.data