Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ae5279a1
Unverified
Commit
ae5279a1
authored
Nov 03, 2024
by
Yongzao
Committed by
GitHub
Nov 02, 2024
Browse files
[torch.compile] Adding torch compile to vision-language models (#9946)
parent
1b73ab2a
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
21 additions
and
8 deletions
+21
-8
vllm/model_executor/models/llava_next.py
vllm/model_executor/models/llava_next.py
+7
-3
vllm/model_executor/models/minicpmv.py
vllm/model_executor/models/minicpmv.py
+6
-1
vllm/model_executor/models/molmo.py
vllm/model_executor/models/molmo.py
+8
-4
No files found.
vllm/model_executor/models/llava_next.py
View file @
ae5279a1
...
...
@@ -606,7 +606,6 @@ class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal,
:class:`LlavaNextImageInputs`
"""
if
intermediate_tensors
is
not
None
:
input_ids
=
None
inputs_embeds
=
None
else
:
image_input
=
self
.
_parse_and_validate_image_input
(
**
kwargs
)
...
...
@@ -618,9 +617,14 @@ class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal,
self
.
language_model
.
model
.
get_input_embeddings
,
lambda
_
:
self
.
_process_image_input
(
image_input
),
)
input_ids
=
None
else
:
inputs_embeds
=
None
inputs_embeds
=
self
.
language_model
.
model
.
get_input_embeddings
(
input_ids
)
# always pass the input via `inputs_embeds`
# to make sure the computation graph is consistent
# for `torch.compile` integration
input_ids
=
None
hidden_states
=
self
.
language_model
.
model
(
input_ids
,
positions
,
...
...
vllm/model_executor/models/minicpmv.py
View file @
ae5279a1
...
...
@@ -564,8 +564,13 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP):
vlm_embeddings
,
_
=
self
.
get_embedding
(
input_ids
,
image_inputs
)
# always pass the input via `inputs_embeds`
# to make sure the computation graph is consistent
# for `torch.compile` integration
input_ids
=
None
output
=
self
.
llm
(
input_ids
=
None
,
input_ids
=
input_ids
,
positions
=
positions
,
kv_caches
=
kv_caches
,
attn_metadata
=
attn_metadata
,
...
...
vllm/model_executor/models/molmo.py
View file @
ae5279a1
...
...
@@ -15,6 +15,7 @@ from transformers import PretrainedConfig
from
vllm.attention
import
Attention
,
AttentionMetadata
from
vllm.attention.selector
import
_Backend
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
MultiModalConfig
from
vllm.distributed
import
(
get_pp_group
,
get_tensor_model_parallel_rank
,
get_tensor_model_parallel_world_size
,
...
...
@@ -713,6 +714,7 @@ class MolmoVisionBackbone(nn.Module):
return
image_features
@
support_torch_compile
class
MolmoModel
(
nn
.
Module
):
def
__init__
(
...
...
@@ -1141,7 +1143,6 @@ class MolmoForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
**
kwargs
:
object
,
)
->
SamplerOutput
:
if
intermediate_tensors
is
not
None
:
input_ids
=
None
inputs_embeds
=
None
else
:
image_input
=
self
.
_parse_and_validate_image_input
(
**
kwargs
)
...
...
@@ -1156,10 +1157,13 @@ class MolmoForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
image_input
[
"image_input_idx"
],
image_input
[
"seq_len"
],
)
else
:
inputs_embeds
=
self
.
model
.
embed_tokens
(
input_ids
)
# always pass the input via `inputs_embeds`
# to make sure the computation graph is consistent
# for `torch.compile` integration
input_ids
=
None
else
:
inputs_embeds
=
None
hidden_states
=
self
.
model
(
input_ids
=
input_ids
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment