[Bugfix] Support other quantization methods in glm41v (#36321)

Signed-off-by: g00887675/loganJane <g00887675/loganJane73@hotmail.com> Co-authored-by: g00887675/loganJane <g00887675/loganJane73@hotmail.com> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>

[Bugfix] Support other quantization methods in glm41v (#36321)
Signed-off-by: g00887675/loganJane <g00887675/loganJane73@hotmail.com> Co-authored-by: g00887675/loganJane <g00887675/loganJane73@hotmail.com> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
545d18d8 · LoganJane · GitHub · e661b9ee · 545d18d8
Unverified Commit 545d18d8 authored Mar 11, 2026 by LoganJane Committed by GitHub Mar 11, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 1 deletion

vllm/model_executor/models/glm4_1v.py vllm/model_executor/models/glm4_1v.py +6 -1

No files found.
--- a/vllm/model_executor/models/glm4_1v.py
+++ b/vllm/model_executor/models/glm4_1v.py
@@ -63,6 +63,9 @@ from vllm.model_executor.layers.linear import (
    RowParallelLinear,
 )
 from vllm.model_executor.layers.quantization import QuantizationConfig
+from vllm.model_executor.layers.quantization.compressed_tensors import (
+    compressed_tensors,
+)
 from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.model_executor.layers.rotary_embedding.common import (
    ApplyRotaryEmb,
@@ -280,7 +283,9 @@ class Glm4vVisionAttention(nn.Module):
            bias=False,
            quant_config=quant_config,
            # Change qkv prefix to align with GLM-4.5V-FP8 quantization cfg
-            prefix=f"{prefix}.qkv_proj" if quant_config else f"{prefix}.qkv",
+            prefix=f"{prefix}.qkv_proj"
+            if isinstance(quant_config, compressed_tensors.CompressedTensorsConfig)
+            else f"{prefix}.qkv",
            disable_tp=use_data_parallel,
        )
        self.proj = RowParallelLinear(