Unverified Commit 3e34e900 authored by narutolhy's avatar narutolhy Committed by GitHub
Browse files

Fix: sync prepare_fp8_layer_for_marlin with latest vllm changes (#7648)

parent 7349717e
......@@ -76,7 +76,7 @@ class CompressedTensorsW8A16Fp8(CompressedTensorsScheme):
layer.input_scale = torch.nn.Parameter(
layer.input_scale.data, requires_grad=False
)
prepare_fp8_layer_for_marlin(layer, strategy="channel")
prepare_fp8_layer_for_marlin(layer, size_k_first=True)
def create_weights(
self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment