Unverified Commit 2f2fcb31 authored by Yuxuan Zhang's avatar Yuxuan Zhang Committed by GitHub
Browse files

[Misc] Remove _maybe_ignore_quant_config from GLM4.1v (#20432)


Signed-off-by: default avatarzRzRzRzRzRzRzR <2448370773@qq.com>
parent 1dba2c4e
...@@ -55,9 +55,6 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear, ...@@ -55,9 +55,6 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear,
QKVParallelLinear, QKVParallelLinear,
RowParallelLinear) RowParallelLinear)
from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.model_executor.layers.quantization.gptq import GPTQConfig
from vllm.model_executor.layers.quantization.gptq_marlin import (
GPTQMarlinConfig)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.models.module_mapping import MultiModelKeys from vllm.model_executor.models.module_mapping import MultiModelKeys
from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal import MULTIMODAL_REGISTRY
...@@ -179,6 +176,7 @@ class Glm4vVisionMLP(nn.Module): ...@@ -179,6 +176,7 @@ class Glm4vVisionMLP(nn.Module):
hidden_features: int, hidden_features: int,
bias: bool = False, bias: bool = False,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
): ):
super().__init__() super().__init__()
self.gate_up_proj = MergedColumnParallelLinear( self.gate_up_proj = MergedColumnParallelLinear(
...@@ -186,13 +184,12 @@ class Glm4vVisionMLP(nn.Module): ...@@ -186,13 +184,12 @@ class Glm4vVisionMLP(nn.Module):
output_sizes=[hidden_features] * 2, output_sizes=[hidden_features] * 2,
bias=bias, bias=bias,
quant_config=quant_config, quant_config=quant_config,
) prefix=f"{prefix}.gate_up_proj")
self.down_proj = RowParallelLinear( self.down_proj = RowParallelLinear(hidden_features,
hidden_features,
in_features, in_features,
bias=bias, bias=bias,
quant_config=quant_config, quant_config=quant_config,
) prefix=f"{prefix}.down_proj")
self.act_fn = SiluAndMul() self.act_fn = SiluAndMul()
def forward(self, x: torch.Tensor): def forward(self, x: torch.Tensor):
...@@ -407,6 +404,7 @@ class Glm4vVisionBlock(nn.Module): ...@@ -407,6 +404,7 @@ class Glm4vVisionBlock(nn.Module):
mlp_hidden_dim, mlp_hidden_dim,
bias=False, bias=False,
quant_config=quant_config, quant_config=quant_config,
prefix=f"{prefix}.mlp",
) )
def forward( def forward(
...@@ -1278,7 +1276,7 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal, ...@@ -1278,7 +1276,7 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
self.visual = Glm4vVisionTransformer( self.visual = Glm4vVisionTransformer(
config.vision_config, config.vision_config,
norm_eps=getattr(config, "rms_norm_eps", 1e-5), norm_eps=getattr(config, "rms_norm_eps", 1e-5),
quant_config=self._maybe_ignore_quant_config(quant_config), quant_config=quant_config,
prefix=maybe_prefix(prefix, "visual"), prefix=maybe_prefix(prefix, "visual"),
) )
...@@ -1291,13 +1289,6 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal, ...@@ -1291,13 +1289,6 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
self.make_empty_intermediate_tensors = ( self.make_empty_intermediate_tensors = (
self.language_model.make_empty_intermediate_tensors) self.language_model.make_empty_intermediate_tensors)
def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig):
# GPTQ configs do not have a list of ignored modules, however AutoGPTQ
# seems to avoid vision encoder sections for some models.
if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)):
return None
return quant_config
def _validate_and_reshape_mm_tensor(self, mm_input: object, def _validate_and_reshape_mm_tensor(self, mm_input: object,
name: str) -> torch.Tensor: name: str) -> torch.Tensor:
if not isinstance(mm_input, (torch.Tensor, list)): if not isinstance(mm_input, (torch.Tensor, list)):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment