[ Bugfix ] Fix AutoFP8 fp8 marlin (#6609)

082ecd80 · Robert Shaw · GitHub · f952bbc8 · 082ecd80
Unverified Commit 082ecd80 authored Jul 20, 2024 by Robert Shaw Committed by GitHub Jul 20, 2024
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 1 deletion

vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py ...el_executor/layers/quantization/utils/marlin_utils_fp8.py +2 -1

No files found.
--- a/vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py
+++ b/vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py
@@ -76,7 +76,8 @@ def prepare_fp8_layer_for_marlin(layer: torch.nn.Module) -> None:
    # WEIGHT SCALES
    # Currently Marlin doesn't support per-tensor scales, so we
    # expand it to channelwise
-    is_channelwise = layer.weight_scale.shape[0] == part_size_n
+    is_channelwise = (len(layer.weight_scale.shape) > 0
+                      and layer.weight_scale.shape[0] == part_size_n)
    if is_channelwise:
        scales = layer.weight_scale
    else: