Fix blaslt miss bias

09c2856a · zhuwenwen · 9be76efd · 09c2856a · 09c2856a
Commit 09c2856a authored Nov 26, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 1 deletion

vllm/_custom_ops.py vllm/_custom_ops.py +2 -0

vllm/model_executor/layers/quantization/utils/w8a8_utils.py vllm/model_executor/layers/quantization/utils/w8a8_utils.py +1 -1

No files found.
--- a/vllm/_custom_ops.py
+++ b/vllm/_custom_ops.py
@@ -1091,6 +1091,8 @@ def blaslt_scaled_mm(a: torch.Tensor,
    n = b.shape[0]
    k = a.shape[1]
    _, out = quant_ops.hipblaslt_w8a8_gemm(a, b, scale_a, scale_b, m, n, k, 'NT', out_dtype)
+    if bias is not None:
+        out += bias
    return out
 def triton_scaled_mm(a: torch.Tensor,

--- a/vllm/model_executor/layers/quantization/utils/w8a8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/w8a8_utils.py
@@ -555,7 +555,7 @@ def apply_int8_linear(
                                    scale_a=x_scale,
                                    scale_b=weight_scale,
                                    out_dtype=input.dtype,
-                                    bias=None)
+                                    bias=bias)
    else:
        return ops.rocblas_scaled_mm(
                x_q,