Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f83feccd
Unverified
Commit
f83feccd
authored
Nov 08, 2024
by
Michael Goin
Committed by
GitHub
Nov 09, 2024
Browse files
[Bugfix] Ignore GPTQ quantization of Qwen2-VL visual module (#10169)
Signed-off-by:
mgoin
<
michael@neuralmagic.com
>
parent
e0191a95
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
2 deletions
+12
-2
vllm/model_executor/models/qwen2_vl.py
vllm/model_executor/models/qwen2_vl.py
+12
-2
No files found.
vllm/model_executor/models/qwen2_vl.py
View file @
f83feccd
...
...
@@ -51,7 +51,9 @@ from vllm.model_executor.layers.activation import QuickGELU
from
vllm.model_executor.layers.linear
import
(
ColumnParallelLinear
,
RowParallelLinear
)
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization
import
(
GPTQConfig
,
GPTQMarlinConfig
,
QuantizationConfig
)
from
vllm.model_executor.layers.sampler
import
SamplerOutput
,
get_sampler
from
vllm.model_executor.layers.vocab_parallel_embedding
import
ParallelLMHead
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
...
...
@@ -982,7 +984,7 @@ class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal,
self
.
visual
=
Qwen2VisionTransformer
(
config
.
vision_config
,
norm_eps
=
getattr
(
config
,
"rms_norm_eps"
,
1e-6
),
quant_config
=
quant_config
,
quant_config
=
self
.
_maybe_ignore_quant_config
(
quant_config
)
,
prefix
=
"visual"
,
)
...
...
@@ -1008,6 +1010,14 @@ class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal,
make_empty_intermediate_tensors_factory
(
[
"hidden_states"
,
"residual"
],
config
.
hidden_size
))
def
_maybe_ignore_quant_config
(
self
,
quant_config
:
QuantizationConfig
):
# GPTQ configs do not have a list of ignored modules, however AutoGPTQ
# seems to avoid vision encoder sections for some models.
# See: https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4
if
isinstance
(
quant_config
,
(
GPTQConfig
,
GPTQMarlinConfig
)):
return
None
return
quant_config
def
_validate_and_reshape_mm_tensor
(
self
,
mm_input
:
Union
[
torch
.
Tensor
,
List
[
torch
.
Tensor
]],
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment