Unverified Commit 545d18d8 authored by LoganJane's avatar LoganJane Committed by GitHub
Browse files

[Bugfix] Support other quantization methods in glm41v (#36321)


Signed-off-by: default avatarg00887675/loganJane <g00887675/loganJane73@hotmail.com>
Co-authored-by: default avatarg00887675/loganJane <g00887675/loganJane73@hotmail.com>
Co-authored-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
parent e661b9ee
...@@ -63,6 +63,9 @@ from vllm.model_executor.layers.linear import ( ...@@ -63,6 +63,9 @@ from vllm.model_executor.layers.linear import (
RowParallelLinear, RowParallelLinear,
) )
from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.model_executor.layers.quantization.compressed_tensors import (
compressed_tensors,
)
from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.layers.rotary_embedding.common import ( from vllm.model_executor.layers.rotary_embedding.common import (
ApplyRotaryEmb, ApplyRotaryEmb,
...@@ -280,7 +283,9 @@ class Glm4vVisionAttention(nn.Module): ...@@ -280,7 +283,9 @@ class Glm4vVisionAttention(nn.Module):
bias=False, bias=False,
quant_config=quant_config, quant_config=quant_config,
# Change qkv prefix to align with GLM-4.5V-FP8 quantization cfg # Change qkv prefix to align with GLM-4.5V-FP8 quantization cfg
prefix=f"{prefix}.qkv_proj" if quant_config else f"{prefix}.qkv", prefix=f"{prefix}.qkv_proj"
if isinstance(quant_config, compressed_tensors.CompressedTensorsConfig)
else f"{prefix}.qkv",
disable_tp=use_data_parallel, disable_tp=use_data_parallel,
) )
self.proj = RowParallelLinear( self.proj = RowParallelLinear(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment