Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2fa2a50b
Unverified
Commit
2fa2a50b
authored
Apr 30, 2025
by
Isotr0py
Committed by
GitHub
Apr 29, 2025
Browse files
[Bugfix] Fix Minicpm-O-int4 GPTQ model inference (#17397)
Signed-off-by:
Isotr0py
<
2037008807@qq.com
>
parent
08e15def
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
36 additions
and
2 deletions
+36
-2
vllm/model_executor/models/minicpmo.py
vllm/model_executor/models/minicpmo.py
+35
-1
vllm/model_executor/models/minicpmv.py
vllm/model_executor/models/minicpmv.py
+1
-1
No files found.
vllm/model_executor/models/minicpmo.py
View file @
2fa2a50b
...
@@ -28,12 +28,16 @@ from typing import (Any, Callable, Literal, Optional, Set, Tuple, TypedDict,
...
@@ -28,12 +28,16 @@ from typing import (Any, Callable, Literal, Optional, Set, Tuple, TypedDict,
import
torch
import
torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
BatchFeature
from
transformers
import
BatchFeature
,
PretrainedConfig
from
transformers.modeling_outputs
import
BaseModelOutputWithPast
from
transformers.modeling_outputs
import
BaseModelOutputWithPast
from
transformers.models.whisper.modeling_whisper
import
(
from
transformers.models.whisper.modeling_whisper
import
(
ACT2FN
,
WHISPER_ATTENTION_CLASSES
,
WhisperConfig
,
WhisperEncoder
)
ACT2FN
,
WHISPER_ATTENTION_CLASSES
,
WhisperConfig
,
WhisperEncoder
)
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization.gptq
import
GPTQConfig
from
vllm.model_executor.layers.quantization.gptq_marlin
import
(
GPTQMarlinConfig
)
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
,
MultiModalKwargs
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
,
MultiModalKwargs
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
NestedTensors
)
NestedTensors
)
...
@@ -512,6 +516,36 @@ class MiniCPMO(MiniCPMV2_6):
...
@@ -512,6 +516,36 @@ class MiniCPMO(MiniCPMV2_6):
self
.
audio_token_id
=
None
self
.
audio_token_id
=
None
def
_maybe_ignore_quant_config
(
self
,
quant_config
:
QuantizationConfig
):
# GPTQ configs do not have a list of ignored modules, however AutoGPTQ
# seems to avoid vision encoder sections for some models.
# See: https://huggingface.co/openbmb/MiniCPM-o-2_6-int4
if
isinstance
(
quant_config
,
(
GPTQConfig
,
GPTQMarlinConfig
)):
return
None
return
quant_config
def
init_vision_module
(
self
,
config
:
PretrainedConfig
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
)
->
nn
.
Module
:
# MiniCPMO GPTQ model leave vpm unquantized.
quant_config
=
self
.
_maybe_ignore_quant_config
(
quant_config
)
return
super
().
init_vision_module
(
config
,
quant_config
,
prefix
)
def
init_resampler
(
self
,
embed_dim
:
int
,
vision_dim
:
int
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
)
->
nn
.
Module
:
# MiniCPMO GPTQ model leave resampler unquantized.
quant_config
=
self
.
_maybe_ignore_quant_config
(
quant_config
)
return
super
().
init_resampler
(
embed_dim
,
vision_dim
,
quant_config
,
prefix
)
def
init_audio_module
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
def
init_audio_module
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
# Do not use parameters temporarily
# Do not use parameters temporarily
audio_config
=
self
.
config
.
audio_config
audio_config
=
self
.
config
.
audio_config
...
...
vllm/model_executor/models/minicpmv.py
View file @
2fa2a50b
...
@@ -1181,7 +1181,7 @@ class MiniCPMV2_6(MiniCPMVBaseModel, SupportsLoRA):
...
@@ -1181,7 +1181,7 @@ class MiniCPMV2_6(MiniCPMVBaseModel, SupportsLoRA):
def
init_vision_module
(
def
init_vision_module
(
self
,
self
,
config
:
PretrainedConfig
,
config
:
PretrainedConfig
,
quant_config
:
Optional
[
QuantizationConfig
],
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
)
->
nn
.
Module
:
)
->
nn
.
Module
:
model
=
Idefics2VisionTransformer
(
config
.
vision_config
,
model
=
Idefics2VisionTransformer
(
config
.
vision_config
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment