[Quark] Fix MoE fp8 activation scale handling on mi300 (#34386)

Signed-off-by: Bowen Bao <bowenbao@amd.com>

[Quark] Fix MoE fp8 activation scale handling on mi300 (#34386)
Signed-off-by: Bowen Bao <bowenbao@amd.com>
d9e62c03 · Bowen Bao · GitHub · a1a2d794 · d9e62c03
Unverified Commit d9e62c03 authored Feb 19, 2026 by Bowen Bao Committed by GitHub Feb 19, 2026
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

vllm/model_executor/layers/quantization/quark/quark_moe.py vllm/model_executor/layers/quantization/quark/quark_moe.py +3 -3

No files found.
--- a/vllm/model_executor/layers/quantization/quark/quark_moe.py
+++ b/vllm/model_executor/layers/quantization/quark/quark_moe.py
@@ -858,7 +858,7 @@ class QuarkOCP_MX_MoEMethod(QuarkMoEMethod):
            layer.w2_input_scale = None

    def process_weights_after_loading(self, layer):
-        if self.static_input_scales:
+        if self.static_input_scales and self.input_dtype == "fp8":
            # firstly, process activations if fp8 static input
            if layer.w13_input_scale is None or layer.w2_input_scale is None:
                raise ValueError(
@@ -883,14 +883,14 @@ class QuarkOCP_MX_MoEMethod(QuarkMoEMethod):
            if current_platform.is_fp8_fnuz():
                # Normalize the weights and scales
                _, _, w13_input_scale = normalize_e4m3fn_to_e4m3fnuz(
-                    torch.empty_like(layer.w13_weight, dtype=torch.float8_e4m3fnuz),
+                    torch.empty_like(layer.w13_weight, dtype=torch.float8_e4m3fn),
                    torch.empty_like(
                        layer.w13_weight_scale, dtype=layer.w13_weight_scale.dtype
                    ),
                    layer.w13_input_scale,
                )
                _, _, w2_input_scale = normalize_e4m3fn_to_e4m3fnuz(
-                    torch.empty_like(layer.w2_weight, dtype=torch.float8_e4m3fnuz),
+                    torch.empty_like(layer.w2_weight, dtype=torch.float8_e4m3fn),
                    torch.empty_like(
                        layer.w2_weight_scale, dtype=layer.w13_weight_scale.dtype
                    ),