Commit e675f1f4 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.15.1-dev-fth' into 'v0.15.1-dev'

修复channel-wise 用triton报错的bug

See merge request dcutoolkit/deeplearing/vllm!443
parents d146a231 9d44744c
...@@ -161,9 +161,8 @@ class CompressedTensorsW8A8Fp8(CompressedTensorsScheme): ...@@ -161,9 +161,8 @@ class CompressedTensorsW8A8Fp8(CompressedTensorsScheme):
) )
if envs.VLLM_W8A8_BACKEND == 3: if envs.VLLM_W8A8_BACKEND == 3:
weight = weight.t().contiguous() weight = weight.t().contiguous()
# triton不用转置,torch需要 else:
# else: weight = weight.t()
# weight = weight.t()
elif self.strategy == QuantizationStrategy.BLOCK: elif self.strategy == QuantizationStrategy.BLOCK:
assert self.is_static_input_scheme is False assert self.is_static_input_scheme is False
weight, weight_scale = process_fp8_weight_block_strategy( weight, weight_scale = process_fp8_weight_block_strategy(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment