Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2f2fcb31
Unverified
Commit
2f2fcb31
authored
Jul 04, 2025
by
Yuxuan Zhang
Committed by
GitHub
Jul 03, 2025
Browse files
[Misc] Remove _maybe_ignore_quant_config from GLM4.1v (#20432)
Signed-off-by:
zRzRzRzRzRzRzR
<
2448370773@qq.com
>
parent
1dba2c4e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
18 deletions
+9
-18
vllm/model_executor/models/glm4_1v.py
vllm/model_executor/models/glm4_1v.py
+9
-18
No files found.
vllm/model_executor/models/glm4_1v.py
View file @
2f2fcb31
...
...
@@ -55,9 +55,6 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear,
QKVParallelLinear
,
RowParallelLinear
)
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization.gptq
import
GPTQConfig
from
vllm.model_executor.layers.quantization.gptq_marlin
import
(
GPTQMarlinConfig
)
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.models.module_mapping
import
MultiModelKeys
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
...
...
@@ -179,6 +176,7 @@ class Glm4vVisionMLP(nn.Module):
hidden_features
:
int
,
bias
:
bool
=
False
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
):
super
().
__init__
()
self
.
gate_up_proj
=
MergedColumnParallelLinear
(
...
...
@@ -186,13 +184,12 @@ class Glm4vVisionMLP(nn.Module):
output_sizes
=
[
hidden_features
]
*
2
,
bias
=
bias
,
quant_config
=
quant_config
,
)
self
.
down_proj
=
RowParallelLinear
(
hidden_features
,
in_features
,
bias
=
bias
,
quant_config
=
quant_config
,
)
prefix
=
f
"
{
prefix
}
.gate_up_proj"
)
self
.
down_proj
=
RowParallelLinear
(
hidden_features
,
in_features
,
bias
=
bias
,
quant_config
=
quant_config
,
prefix
=
f
"
{
prefix
}
.down_proj"
)
self
.
act_fn
=
SiluAndMul
()
def
forward
(
self
,
x
:
torch
.
Tensor
):
...
...
@@ -407,6 +404,7 @@ class Glm4vVisionBlock(nn.Module):
mlp_hidden_dim
,
bias
=
False
,
quant_config
=
quant_config
,
prefix
=
f
"
{
prefix
}
.mlp"
,
)
def
forward
(
...
...
@@ -1278,7 +1276,7 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
self
.
visual
=
Glm4vVisionTransformer
(
config
.
vision_config
,
norm_eps
=
getattr
(
config
,
"rms_norm_eps"
,
1e-5
),
quant_config
=
self
.
_maybe_ignore_quant_config
(
quant_config
)
,
quant_config
=
quant_config
,
prefix
=
maybe_prefix
(
prefix
,
"visual"
),
)
...
...
@@ -1291,13 +1289,6 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
self
.
make_empty_intermediate_tensors
=
(
self
.
language_model
.
make_empty_intermediate_tensors
)
def
_maybe_ignore_quant_config
(
self
,
quant_config
:
QuantizationConfig
):
# GPTQ configs do not have a list of ignored modules, however AutoGPTQ
# seems to avoid vision encoder sections for some models.
if
isinstance
(
quant_config
,
(
GPTQConfig
,
GPTQMarlinConfig
)):
return
None
return
quant_config
def
_validate_and_reshape_mm_tensor
(
self
,
mm_input
:
object
,
name
:
str
)
->
torch
.
Tensor
:
if
not
isinstance
(
mm_input
,
(
torch
.
Tensor
,
list
)):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment