Fix: sync prepare_fp8_layer_for_marlin with latest vllm changes (#7648)

3e34e900 · narutolhy · GitHub · 7349717e · 3e34e900
Unverified Commit 3e34e900 authored Jun 30, 2025 by narutolhy Committed by GitHub Jun 30, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

python/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py ...ompressed_tensors/schemes/compressed_tensors_w8a16_fp8.py +1 -1

No files found.
--- a/python/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py
+++ b/python/sglang/srt/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py
@@ -76,7 +76,7 @@ class CompressedTensorsW8A16Fp8(CompressedTensorsScheme):
            layer.input_scale = torch.nn.Parameter(
                layer.input_scale.data, requires_grad=False
            )
-        prepare_fp8_layer_for_marlin(layer, strategy="channel")
+        prepare_fp8_layer_for_marlin(layer, size_k_first=True)

    def create_weights(
        self,