修复channel-wise 用triton报错的bug

9d44744c · SAC_fanth · d146a231 · 9d44744c
Commit 9d44744c authored Feb 24, 2026 by SAC_fanth
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 3 deletions

vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py ...compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py +2 -3

No files found.
--- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py
@@ -161,9 +161,8 @@ class CompressedTensorsW8A8Fp8(CompressedTensorsScheme):
            )
            if envs.VLLM_W8A8_BACKEND == 3:
                weight = weight.t().contiguous()
-            # triton不用转置，torch需要
+            else:
-            # else:
+                weight = weight.t()
-            #     weight = weight.t()
        elif self.strategy == QuantizationStrategy.BLOCK:
            assert self.is_static_input_scheme is False
            weight, weight_scale = process_fp8_weight_block_strategy(